mirror of
https://codeberg.org/icewind/php-literal-parser.git
synced 2026-06-03 18:44:07 +02:00
minor string code cleanup
This commit is contained in:
parent
e9e13df444
commit
f1c1f99328
2 changed files with 161 additions and 125 deletions
|
|
@ -2,7 +2,7 @@ use crate::error::UnexpectedTokenError;
|
|||
use crate::error::{ExpectToken, InvalidArrayKeyError, ParseError, ResultExt, SpannedError};
|
||||
use crate::lexer::Token;
|
||||
use crate::num::parse_int;
|
||||
use crate::string::{unescape_double, unescape_single, UnescapeError};
|
||||
use crate::string::parse_string;
|
||||
use crate::{Key, Value};
|
||||
use logos::{Lexer, Logos};
|
||||
use std::collections::HashMap;
|
||||
|
|
@ -69,17 +69,6 @@ pub fn parse_lexer<'source>(
|
|||
Ok(value)
|
||||
}
|
||||
|
||||
fn parse_string(literal: &str) -> Result<String, UnescapeError> {
|
||||
let single_quote = literal.bytes().next().unwrap() == b'\'';
|
||||
let inner = &literal[1..(literal.len()) - 1];
|
||||
|
||||
if single_quote {
|
||||
unescape_single(inner)
|
||||
} else {
|
||||
unescape_double(inner)
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_float(literal: &str) -> Result<f64, ParseFloatError> {
|
||||
let stripped = literal.replace('_', "");
|
||||
stripped.parse()
|
||||
|
|
|
|||
167
src/string.rs
167
src/string.rs
|
|
@ -12,10 +12,6 @@ struct UnescapeState {
|
|||
}
|
||||
|
||||
impl UnescapeState {
|
||||
fn new() -> UnescapeState {
|
||||
UnescapeState { out: Vec::new() }
|
||||
}
|
||||
|
||||
fn with_capacity(capacity: usize) -> UnescapeState {
|
||||
UnescapeState {
|
||||
out: Vec::with_capacity(capacity),
|
||||
|
|
@ -71,10 +67,14 @@ fn parse_u32(
|
|||
Ok(result)
|
||||
}
|
||||
|
||||
fn handle_single_escape<'a>(
|
||||
bytes: &'a [u8],
|
||||
state: &mut UnescapeState,
|
||||
) -> UnescapeResult<&'a [u8]> {
|
||||
trait EscapedString {
|
||||
fn handle_escape<'a>(bytes: &'a [u8], state: &mut UnescapeState) -> UnescapeResult<&'a [u8]>;
|
||||
}
|
||||
|
||||
struct SingleQuoteString;
|
||||
|
||||
impl EscapedString for SingleQuoteString {
|
||||
fn handle_escape<'a>(bytes: &'a [u8], state: &mut UnescapeState) -> UnescapeResult<&'a [u8]> {
|
||||
let mut ins = PeekableBytes::new(bytes);
|
||||
debug_assert_eq!(ins.next(), Some(b'\\'));
|
||||
match ins.next() {
|
||||
|
|
@ -90,27 +90,13 @@ fn handle_single_escape<'a>(
|
|||
},
|
||||
}
|
||||
Ok(ins.as_slice())
|
||||
}
|
||||
|
||||
/// Un-escape a string, following php single quote rules
|
||||
pub fn unescape_single(s: &str) -> UnescapeResult<String> {
|
||||
let mut state = UnescapeState::with_capacity(s.len());
|
||||
let mut bytes = s.as_bytes();
|
||||
while let Some(escape_index) = memchr::memchr(b'\\', bytes) {
|
||||
state.push_slice(&bytes[0..escape_index]);
|
||||
bytes = &bytes[escape_index..];
|
||||
bytes = handle_single_escape(bytes, &mut state)?;
|
||||
}
|
||||
|
||||
state.push_slice(&bytes[0..]);
|
||||
|
||||
Ok(state.finalize())
|
||||
}
|
||||
|
||||
fn handle_double_escape<'a>(
|
||||
bytes: &'a [u8],
|
||||
state: &mut UnescapeState,
|
||||
) -> UnescapeResult<&'a [u8]> {
|
||||
struct DoubleQuoteString;
|
||||
|
||||
impl EscapedString for DoubleQuoteString {
|
||||
fn handle_escape<'a>(bytes: &'a [u8], state: &mut UnescapeState) -> UnescapeResult<&'a [u8]> {
|
||||
let mut ins = PeekableBytes::new(bytes);
|
||||
debug_assert_eq!(ins.next(), Some(b'\\'));
|
||||
match ins.next() {
|
||||
|
|
@ -148,7 +134,8 @@ fn handle_double_escape<'a>(
|
|||
}
|
||||
},
|
||||
b'0'..=b'7' => {
|
||||
let val = parse_u32(&mut ins, 8, (d as char).to_digit(8).unwrap(), Some(3))?;
|
||||
let val =
|
||||
parse_u32(&mut ins, 8, (d as char).to_digit(8).unwrap(), Some(3))?;
|
||||
state.push_raw(val)?;
|
||||
}
|
||||
_ => {
|
||||
|
|
@ -159,16 +146,25 @@ fn handle_double_escape<'a>(
|
|||
}
|
||||
}
|
||||
Ok(ins.as_slice())
|
||||
}
|
||||
}
|
||||
|
||||
/// Un-escape a string, following php double quote rules
|
||||
pub fn unescape_double(s: &str) -> UnescapeResult<String> {
|
||||
pub fn parse_string(literal: &str) -> Result<String, UnescapeError> {
|
||||
let inner = &literal[1..(literal.len()) - 1];
|
||||
if literal.bytes().next().unwrap() == b'\'' {
|
||||
unescape::<SingleQuoteString>(inner)
|
||||
} else {
|
||||
unescape::<DoubleQuoteString>(inner)
|
||||
}
|
||||
}
|
||||
|
||||
fn unescape<S: EscapedString>(s: &str) -> UnescapeResult<String> {
|
||||
let mut state = UnescapeState::with_capacity(s.len());
|
||||
let mut bytes = s.as_bytes();
|
||||
while let Some(escape_index) = memchr::memchr(b'\\', bytes) {
|
||||
state.push_slice(&bytes[0..escape_index]);
|
||||
bytes = &bytes[escape_index..];
|
||||
bytes = handle_double_escape(bytes, &mut state)?;
|
||||
bytes = S::handle_escape(bytes, &mut state)?;
|
||||
}
|
||||
|
||||
state.push_slice(&bytes[0..]);
|
||||
|
|
@ -211,44 +207,95 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn test_unescape_single() {
|
||||
assert_eq!(unescape_single(&r#"abc"#), Ok("abc".into()));
|
||||
assert_eq!(unescape_single(&r#"ab\nc"#), Ok("ab\\nc".into()));
|
||||
assert_eq!(unescape_single(r#"ab\zc"#), Ok("ab\\zc".into()));
|
||||
assert_eq!(unescape_single(r#" \"abc\" "#), Ok(" \\\"abc\\\" ".into()));
|
||||
assert_eq!(unescape_single(r#"𝄞"#), Ok("𝄞".into()));
|
||||
assert_eq!(unescape_single(r#"\𝄞"#), Ok("\\𝄞".into()));
|
||||
assert_eq!(unescape::<SingleQuoteString>(&r#"abc"#), Ok("abc".into()));
|
||||
assert_eq!(
|
||||
unescape_single(r#"\xD834\xDD1E"#),
|
||||
unescape::<SingleQuoteString>(&r#"ab\nc"#),
|
||||
Ok("ab\\nc".into())
|
||||
);
|
||||
assert_eq!(
|
||||
unescape::<SingleQuoteString>(r#"ab\zc"#),
|
||||
Ok("ab\\zc".into())
|
||||
);
|
||||
assert_eq!(
|
||||
unescape::<SingleQuoteString>(r#" \"abc\" "#),
|
||||
Ok(" \\\"abc\\\" ".into())
|
||||
);
|
||||
assert_eq!(unescape::<SingleQuoteString>(r#"𝄞"#), Ok("𝄞".into()));
|
||||
assert_eq!(unescape::<SingleQuoteString>(r#"\𝄞"#), Ok("\\𝄞".into()));
|
||||
assert_eq!(
|
||||
unescape::<SingleQuoteString>(r#"\xD834\xDD1E"#),
|
||||
Ok("\\xD834\\xDD1E".into())
|
||||
);
|
||||
assert_eq!(unescape_single(r#"\xD834"#), Ok("\\xD834".into()));
|
||||
assert_eq!(unescape_single(r#"\xDD1E"#), Ok("\\xDD1E".into()));
|
||||
assert_eq!(unescape_single("\t"), Ok("\t".into()));
|
||||
assert_eq!(
|
||||
unescape::<SingleQuoteString>(r#"\xD834"#),
|
||||
Ok("\\xD834".into())
|
||||
);
|
||||
assert_eq!(
|
||||
unescape::<SingleQuoteString>(r#"\xDD1E"#),
|
||||
Ok("\\xDD1E".into())
|
||||
);
|
||||
assert_eq!(unescape::<SingleQuoteString>("\t"), Ok("\t".into()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_unescape_double() {
|
||||
assert_eq!(unescape_double(&r#"abc"#), Ok("abc".into()));
|
||||
assert_eq!(unescape_double(&r#"ab\nc"#), Ok("ab\nc".into()));
|
||||
assert_eq!(unescape_double(r#"ab\zc"#), Ok("ab\\zc".into()));
|
||||
assert_eq!(unescape_double(r#" \"abc\" "#), Ok(" \"abc\" ".into()));
|
||||
assert_eq!(unescape_double(r#"𝄞"#), Ok("𝄞".into()));
|
||||
assert_eq!(unescape_double(r#"\𝄞"#), Ok("\\𝄞".into()));
|
||||
assert_eq!(unescape_double(r#"\u{1D11E}"#), Ok("𝄞".into()));
|
||||
assert_eq!(unescape_double(r#"\xD834"#), Ok("\u{D8}34".into()));
|
||||
assert_eq!(unescape_double(r#"\xDD1E"#), Ok("\u{DD}1E".into()));
|
||||
assert_eq!(unescape_double(r#"\xD"#), Ok("\u{D}".into()));
|
||||
assert_eq!(unescape_double("\t"), Ok("\t".into()));
|
||||
assert_eq!(unescape_double(r#"\u{D834"#), Err(UnescapeError));
|
||||
assert_eq!(unescape_double(r#"\uD834"#), Ok("\\uD834".into()));
|
||||
assert_eq!(unescape_double(r#"\u"#), Ok("\\u".into()));
|
||||
assert_eq!(unescape_double(r#"\47foo"#), Ok("'foo".into()));
|
||||
assert_eq!(unescape_double(r#"\48foo"#), Ok("\u{4}8foo".into()));
|
||||
assert_eq!(unescape_double(r#"\87foo"#), Ok("\\87foo".into()));
|
||||
|
||||
assert_eq!(unescape_double(r#"\u{999999}"#), Err(UnescapeError));
|
||||
assert_eq!(unescape::<DoubleQuoteString>(&r#"abc"#), Ok("abc".into()));
|
||||
assert_eq!(
|
||||
unescape_double(r#"\u{999999999999999999}"#),
|
||||
unescape::<DoubleQuoteString>(&r#"ab\nc"#),
|
||||
Ok("ab\nc".into())
|
||||
);
|
||||
assert_eq!(
|
||||
unescape::<DoubleQuoteString>(r#"ab\zc"#),
|
||||
Ok("ab\\zc".into())
|
||||
);
|
||||
assert_eq!(
|
||||
unescape::<DoubleQuoteString>(r#" \"abc\" "#),
|
||||
Ok(" \"abc\" ".into())
|
||||
);
|
||||
assert_eq!(unescape::<DoubleQuoteString>(r#"𝄞"#), Ok("𝄞".into()));
|
||||
assert_eq!(unescape::<DoubleQuoteString>(r#"\𝄞"#), Ok("\\𝄞".into()));
|
||||
assert_eq!(
|
||||
unescape::<DoubleQuoteString>(r#"\u{1D11E}"#),
|
||||
Ok("𝄞".into())
|
||||
);
|
||||
assert_eq!(
|
||||
unescape::<DoubleQuoteString>(r#"\xD834"#),
|
||||
Ok("\u{D8}34".into())
|
||||
);
|
||||
assert_eq!(
|
||||
unescape::<DoubleQuoteString>(r#"\xDD1E"#),
|
||||
Ok("\u{DD}1E".into())
|
||||
);
|
||||
assert_eq!(unescape::<DoubleQuoteString>(r#"\xD"#), Ok("\u{D}".into()));
|
||||
assert_eq!(unescape::<DoubleQuoteString>("\t"), Ok("\t".into()));
|
||||
assert_eq!(
|
||||
unescape::<DoubleQuoteString>(r#"\u{D834"#),
|
||||
Err(UnescapeError)
|
||||
);
|
||||
assert_eq!(
|
||||
unescape::<DoubleQuoteString>(r#"\uD834"#),
|
||||
Ok("\\uD834".into())
|
||||
);
|
||||
assert_eq!(unescape::<DoubleQuoteString>(r#"\u"#), Ok("\\u".into()));
|
||||
assert_eq!(
|
||||
unescape::<DoubleQuoteString>(r#"\47foo"#),
|
||||
Ok("'foo".into())
|
||||
);
|
||||
assert_eq!(
|
||||
unescape::<DoubleQuoteString>(r#"\48foo"#),
|
||||
Ok("\u{4}8foo".into())
|
||||
);
|
||||
assert_eq!(
|
||||
unescape::<DoubleQuoteString>(r#"\87foo"#),
|
||||
Ok("\\87foo".into())
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
unescape::<DoubleQuoteString>(r#"\u{999999}"#),
|
||||
Err(UnescapeError)
|
||||
);
|
||||
assert_eq!(
|
||||
unescape::<DoubleQuoteString>(r#"\u{999999999999999999}"#),
|
||||
Err(UnescapeError)
|
||||
);
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue