also optimize single quoted strings

This commit is contained in:
Robin Appelman 2020-12-08 21:36:08 +01:00
commit e9e13df444
2 changed files with 54 additions and 24 deletions

View file

@ -15,7 +15,7 @@ fn perf_parse_int_basic(b: &mut Bencher) {
} }
#[bench] #[bench]
fn perf_str_basic(b: &mut Bencher) { fn perf_str_double_basic(b: &mut Bencher) {
let input = r#""aut dolores excepturi rerum est velit ad natus eveniet quo tenetur et fugiat sit velit ipsam nesciunt sint et architecto""#; let input = r#""aut dolores excepturi rerum est velit ad natus eveniet quo tenetur et fugiat sit velit ipsam nesciunt sint et architecto""#;
b.iter(|| { b.iter(|| {
@ -24,10 +24,28 @@ fn perf_str_basic(b: &mut Bencher) {
} }
#[bench] #[bench]
fn perf_str_escape(b: &mut Bencher) { fn perf_str_double_escape(b: &mut Bencher) {
let input = r#""aut dolores excepturi rerum est velit ad natus \"eveniet\" quo tenetur et fugiat sit velit ipsam nesciunt sint et architecto""#; let input = r#""aut dolores excepturi rerum est velit ad natus \"eveniet\" quo tenetur et fugiat sit velit ipsam nesciunt sint et architecto""#;
b.iter(|| { b.iter(|| {
assert!(parse(input).unwrap().is_string()); assert!(parse(input).unwrap().is_string());
}); });
} }
#[bench]
fn perf_str_single_basic(b: &mut Bencher) {
let input = r#"'aut dolores excepturi rerum est velit ad natus eveniet quo tenetur et fugiat sit velit ipsam nesciunt sint et architecto'"#;
b.iter(|| {
assert!(parse(input).unwrap().is_string());
});
}
#[bench]
fn perf_str_single_escape(b: &mut Bencher) {
let input = r#"'aut dolores excepturi rerum est velit ad natus \"eveniet\" quo tenetur et fugiat sit velit ipsam nesciunt sint et architecto'"#;
b.iter(|| {
assert!(parse(input).unwrap().is_string());
});
}

View file

@ -71,34 +71,46 @@ fn parse_u32(
Ok(result) Ok(result)
} }
/// Un-escape a string, following php single quote rules fn handle_single_escape<'a>(
pub fn unescape_single(s: &str) -> UnescapeResult<String> { bytes: &'a [u8],
let mut state = UnescapeState::new(); state: &mut UnescapeState,
let mut ins = s.chars(); ) -> UnescapeResult<&'a [u8]> {
let mut ins = PeekableBytes::new(bytes);
while let Some(c) = ins.next() { debug_assert_eq!(ins.next(), Some(b'\\'));
if c == '\\' {
match ins.next() { match ins.next() {
None => { None => {
return Err(UnescapeError); return Err(UnescapeError);
} }
Some(d) => match d { Some(d) => match d {
'\\' | '\'' => state.push_char(d), b'\\' | b'\'' => state.push_u8(d),
_ => { _ => {
state.push_char('\\'); state.push_u8(b'\\');
state.push_char(d) state.push_u8(d)
} }
}, },
} }
} else { Ok(ins.as_slice())
state.push_char(c);
} }
/// Un-escape a string, following php single quote rules
pub fn unescape_single(s: &str) -> UnescapeResult<String> {
let mut state = UnescapeState::with_capacity(s.len());
let mut bytes = s.as_bytes();
while let Some(escape_index) = memchr::memchr(b'\\', bytes) {
state.push_slice(&bytes[0..escape_index]);
bytes = &bytes[escape_index..];
bytes = handle_single_escape(bytes, &mut state)?;
} }
state.push_slice(&bytes[0..]);
Ok(state.finalize()) Ok(state.finalize())
} }
fn handle_escape<'a>(bytes: &'a [u8], state: &mut UnescapeState) -> UnescapeResult<&'a [u8]> { fn handle_double_escape<'a>(
bytes: &'a [u8],
state: &mut UnescapeState,
) -> UnescapeResult<&'a [u8]> {
let mut ins = PeekableBytes::new(bytes); let mut ins = PeekableBytes::new(bytes);
debug_assert_eq!(ins.next(), Some(b'\\')); debug_assert_eq!(ins.next(), Some(b'\\'));
match ins.next() { match ins.next() {
@ -156,7 +168,7 @@ pub fn unescape_double(s: &str) -> UnescapeResult<String> {
while let Some(escape_index) = memchr::memchr(b'\\', bytes) { while let Some(escape_index) = memchr::memchr(b'\\', bytes) {
state.push_slice(&bytes[0..escape_index]); state.push_slice(&bytes[0..escape_index]);
bytes = &bytes[escape_index..]; bytes = &bytes[escape_index..];
bytes = handle_escape(bytes, &mut state)?; bytes = handle_double_escape(bytes, &mut state)?;
} }
state.push_slice(&bytes[0..]); state.push_slice(&bytes[0..]);