mirror of
https://codeberg.org/icewind/php-literal-parser.git
synced 2026-06-03 18:44:07 +02:00
remove allocation for int parsing
This commit is contained in:
parent
2227135357
commit
f7ae4135d6
6 changed files with 85 additions and 18 deletions
15
benches/parse.rs
Normal file
15
benches/parse.rs
Normal file
|
|
@ -0,0 +1,15 @@
|
|||
#![feature(test)]
|
||||
|
||||
extern crate test;
|
||||
|
||||
use php_literal_parser::parse;
|
||||
use test::Bencher;
|
||||
|
||||
#[bench]
|
||||
fn perf_parse_int_basic(b: &mut Bencher) {
|
||||
let input = "12345676";
|
||||
|
||||
b.iter(|| {
|
||||
assert_eq!(parse(input).unwrap(), 12345676);
|
||||
});
|
||||
}
|
||||
|
|
@ -1,4 +1,5 @@
|
|||
use crate::lexer::Token;
|
||||
use crate::num::ParseIntError;
|
||||
use crate::string::UnescapeError;
|
||||
use crate::Value;
|
||||
use logos::Span;
|
||||
|
|
@ -8,7 +9,7 @@ use source_span::{
|
|||
};
|
||||
use std::error::Error;
|
||||
use std::fmt::{self, Debug, Display};
|
||||
use std::num::{ParseFloatError, ParseIntError};
|
||||
use std::num::ParseFloatError;
|
||||
use std::str::ParseBoolError;
|
||||
use thiserror::Error;
|
||||
|
||||
|
|
|
|||
|
|
@ -20,6 +20,7 @@
|
|||
//!
|
||||
mod error;
|
||||
mod lexer;
|
||||
mod num;
|
||||
mod parser;
|
||||
mod string;
|
||||
|
||||
|
|
|
|||
62
src/num.rs
Normal file
62
src/num.rs
Normal file
|
|
@ -0,0 +1,62 @@
|
|||
use thiserror::Error;
|
||||
|
||||
#[derive(Debug, Error)]
|
||||
pub enum ParseIntError {
|
||||
#[error("cannot parse integer from empty string")]
|
||||
Empty,
|
||||
#[error("invalid digit found in string")]
|
||||
InvalidDigit,
|
||||
#[error("number too large or small to fit in target type")]
|
||||
Overflow,
|
||||
}
|
||||
|
||||
/// Mostly copied from std
|
||||
pub fn parse_int(src: &str) -> Result<i64, ParseIntError> {
|
||||
if src.is_empty() {
|
||||
return Err(ParseIntError::Empty);
|
||||
}
|
||||
|
||||
// all valid digits are ascii, so we will just iterate over the utf8 bytes
|
||||
// and cast them to chars. .to_digit() will safely return None for anything
|
||||
// other than a valid ascii digit for the given radix, including the first-byte
|
||||
// of multi-byte sequences
|
||||
let src = src.as_bytes();
|
||||
|
||||
let (sign, digits) = match src[0] {
|
||||
b'+' => (1, &src[1..]),
|
||||
b'-' => (-1, &src[1..]),
|
||||
_ => (1, src),
|
||||
};
|
||||
|
||||
let (radix, digits) = match digits {
|
||||
[b'0', b'x', tail @ ..] => (16, tail),
|
||||
[b'0', b'b', tail @ ..] => (2, tail),
|
||||
[b'0', tail @ ..] if tail.len() > 0 => (8, tail),
|
||||
tail => (10, tail),
|
||||
};
|
||||
|
||||
if digits.is_empty() {
|
||||
return Err(ParseIntError::Empty);
|
||||
}
|
||||
|
||||
let mut result: i64 = 0;
|
||||
|
||||
// The number is positive
|
||||
for &c in digits {
|
||||
if c != b'_' {
|
||||
let x = match (c as char).to_digit(radix) {
|
||||
Some(x) => x,
|
||||
None => return Err(ParseIntError::InvalidDigit),
|
||||
};
|
||||
result = match result.checked_mul(radix as i64) {
|
||||
Some(result) => result,
|
||||
None => return Err(ParseIntError::Overflow),
|
||||
};
|
||||
result = match result.checked_add(x as i64) {
|
||||
Some(result) => result,
|
||||
None => return Err(ParseIntError::Overflow),
|
||||
};
|
||||
}
|
||||
}
|
||||
Ok(result * sign)
|
||||
}
|
||||
|
|
@ -1,11 +1,12 @@
|
|||
use crate::error::UnexpectedTokenError;
|
||||
use crate::error::{ExpectToken, InvalidArrayKeyError, ParseError, ResultExt, SpannedError};
|
||||
use crate::lexer::Token;
|
||||
use crate::num::parse_int;
|
||||
use crate::string::{unescape_double, unescape_single, UnescapeError};
|
||||
use crate::{Key, Value};
|
||||
use logos::{Lexer, Logos};
|
||||
use std::collections::HashMap;
|
||||
use std::num::{ParseFloatError, ParseIntError};
|
||||
use std::num::ParseFloatError;
|
||||
|
||||
/// Parse a php literal
|
||||
///
|
||||
|
|
@ -79,18 +80,6 @@ fn parse_string(literal: &str) -> Result<String, UnescapeError> {
|
|||
}
|
||||
}
|
||||
|
||||
fn parse_int(literal: &str) -> Result<i64, ParseIntError> {
|
||||
let stripped = literal.replace('_', "");
|
||||
match stripped.as_bytes() {
|
||||
[b'0', b'x', tail @ ..] => i64::from_str_radix(std::str::from_utf8(tail).unwrap(), 16),
|
||||
[b'0', b'b', tail @ ..] => i64::from_str_radix(std::str::from_utf8(tail).unwrap(), 2),
|
||||
[b'0', tail @ ..] if tail.len() > 0 => {
|
||||
i64::from_str_radix(std::str::from_utf8(tail).unwrap(), 8)
|
||||
}
|
||||
tail => i64::from_str_radix(std::str::from_utf8(tail).unwrap(), 10),
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_float(literal: &str) -> Result<f64, ParseFloatError> {
|
||||
let stripped = literal.replace('_', "");
|
||||
stripped.parse()
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
/// unescaping php string literals borrowed mostly from `escape8259`
|
||||
use std::char::decode_utf16;
|
||||
use std::iter::Peekable;
|
||||
use std::iter::{once, Peekable};
|
||||
|
||||
#[derive(Debug, Clone, Eq, PartialEq)]
|
||||
|
||||
|
|
@ -41,10 +41,9 @@ impl UnescapeState {
|
|||
match (self.stash, surrogate) {
|
||||
(0, false) => {
|
||||
// The std library only provides utf16 decode of an iterator,
|
||||
// so to decode a single character we wrap it in an array.
|
||||
// so to decode a single character we wrap it in a `once`.
|
||||
// Hopefully the compiler will elide most of this extra work.
|
||||
let words = [x];
|
||||
match decode_utf16(words.iter().copied()).next() {
|
||||
match decode_utf16(once(x)).next() {
|
||||
Some(Ok(c)) => {
|
||||
self.out.push(c);
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue