remove allocation for int parsing

This commit is contained in:
Robin Appelman 2020-12-08 20:13:50 +01:00
commit f7ae4135d6
6 changed files with 85 additions and 18 deletions

View file

@ -1,4 +1,5 @@
use crate::lexer::Token;
use crate::num::ParseIntError;
use crate::string::UnescapeError;
use crate::Value;
use logos::Span;
@ -8,7 +9,7 @@ use source_span::{
};
use std::error::Error;
use std::fmt::{self, Debug, Display};
use std::num::{ParseFloatError, ParseIntError};
use std::num::ParseFloatError;
use std::str::ParseBoolError;
use thiserror::Error;

View file

@ -20,6 +20,7 @@
//!
mod error;
mod lexer;
mod num;
mod parser;
mod string;

62
src/num.rs Normal file
View file

@ -0,0 +1,62 @@
use thiserror::Error;
#[derive(Debug, Error)]
pub enum ParseIntError {
#[error("cannot parse integer from empty string")]
Empty,
#[error("invalid digit found in string")]
InvalidDigit,
#[error("number too large or small to fit in target type")]
Overflow,
}
/// Mostly copied from std
pub fn parse_int(src: &str) -> Result<i64, ParseIntError> {
if src.is_empty() {
return Err(ParseIntError::Empty);
}
// all valid digits are ascii, so we will just iterate over the utf8 bytes
// and cast them to chars. .to_digit() will safely return None for anything
// other than a valid ascii digit for the given radix, including the first-byte
// of multi-byte sequences
let src = src.as_bytes();
let (sign, digits) = match src[0] {
b'+' => (1, &src[1..]),
b'-' => (-1, &src[1..]),
_ => (1, src),
};
let (radix, digits) = match digits {
[b'0', b'x', tail @ ..] => (16, tail),
[b'0', b'b', tail @ ..] => (2, tail),
[b'0', tail @ ..] if tail.len() > 0 => (8, tail),
tail => (10, tail),
};
if digits.is_empty() {
return Err(ParseIntError::Empty);
}
let mut result: i64 = 0;
// The number is positive
for &c in digits {
if c != b'_' {
let x = match (c as char).to_digit(radix) {
Some(x) => x,
None => return Err(ParseIntError::InvalidDigit),
};
result = match result.checked_mul(radix as i64) {
Some(result) => result,
None => return Err(ParseIntError::Overflow),
};
result = match result.checked_add(x as i64) {
Some(result) => result,
None => return Err(ParseIntError::Overflow),
};
}
}
Ok(result * sign)
}

View file

@ -1,11 +1,12 @@
use crate::error::UnexpectedTokenError;
use crate::error::{ExpectToken, InvalidArrayKeyError, ParseError, ResultExt, SpannedError};
use crate::lexer::Token;
use crate::num::parse_int;
use crate::string::{unescape_double, unescape_single, UnescapeError};
use crate::{Key, Value};
use logos::{Lexer, Logos};
use std::collections::HashMap;
use std::num::{ParseFloatError, ParseIntError};
use std::num::ParseFloatError;
/// Parse a php literal
///
@ -79,18 +80,6 @@ fn parse_string(literal: &str) -> Result<String, UnescapeError> {
}
}
fn parse_int(literal: &str) -> Result<i64, ParseIntError> {
let stripped = literal.replace('_', "");
match stripped.as_bytes() {
[b'0', b'x', tail @ ..] => i64::from_str_radix(std::str::from_utf8(tail).unwrap(), 16),
[b'0', b'b', tail @ ..] => i64::from_str_radix(std::str::from_utf8(tail).unwrap(), 2),
[b'0', tail @ ..] if tail.len() > 0 => {
i64::from_str_radix(std::str::from_utf8(tail).unwrap(), 8)
}
tail => i64::from_str_radix(std::str::from_utf8(tail).unwrap(), 10),
}
}
fn parse_float(literal: &str) -> Result<f64, ParseFloatError> {
let stripped = literal.replace('_', "");
stripped.parse()

View file

@ -1,6 +1,6 @@
/// unescaping php string literals borrowed mostly from `escape8259`
use std::char::decode_utf16;
use std::iter::Peekable;
use std::iter::{once, Peekable};
#[derive(Debug, Clone, Eq, PartialEq)]
@ -41,10 +41,9 @@ impl UnescapeState {
match (self.stash, surrogate) {
(0, false) => {
// The std library only provides utf16 decode of an iterator,
// so to decode a single character we wrap it in an array.
// so to decode a single character we wrap it in a `once`.
// Hopefully the compiler will elide most of this extra work.
let words = [x];
match decode_utf16(words.iter().copied()).next() {
match decode_utf16(once(x)).next() {
Some(Ok(c)) => {
self.out.push(c);
}