extend integer literal support

This commit is contained in:
Robin Appelman 2020-12-07 21:14:47 +01:00
commit a19ce28d07
2 changed files with 44 additions and 2 deletions

View file

@ -24,7 +24,7 @@ pub enum Token {
LiteralString, LiteralString,
#[regex("-?[0-9]*\\.[0-9]+")] #[regex("-?[0-9]*\\.[0-9]+")]
Float, Float,
#[regex("-?[0-9]+")] #[regex("-?(0|[1-9][0-9]*(_[0-9]+)*|0[xX][0-9a-fA-F]+(_[0-9a-fA-F]+)*|0[0-7]+(_[0-7]+)*|0[bB][01]+(_[01]+)*)")]
Integer, Integer,
#[error] #[error]
#[regex(r"[ \t\n\f]+", logos::skip)] #[regex(r"[ \t\n\f]+", logos::skip)]
@ -117,3 +117,27 @@ fn test_lex() {
assert_eq!(lex.next(), None); assert_eq!(lex.next(), None);
} }
#[test]
fn test_lex_int() {
let source = r###"0,123,0x123,0123,0b111,12_34_56"###;
let mut lex = Token::lexer(source);
assert_eq!(lex.next(), Some(Token::Integer));
assert_eq!(lex.next(), Some(Token::Comma));
assert_eq!(lex.next(), Some(Token::Integer));
assert_eq!(lex.next(), Some(Token::Comma));
assert_eq!(lex.next(), Some(Token::Integer));
assert_eq!(lex.next(), Some(Token::Comma));
assert_eq!(lex.next(), Some(Token::Integer));
assert_eq!(lex.next(), Some(Token::Comma));
assert_eq!(lex.next(), Some(Token::Integer));
assert_eq!(lex.next(), Some(Token::Comma));
assert_eq!(lex.next(), Some(Token::Integer));
assert_eq!(lex.next(), None);
}

View file

@ -5,6 +5,7 @@ use crate::string::{unescape_double, unescape_single, UnescapeError};
use crate::{Key, Value}; use crate::{Key, Value};
use logos::{Lexer, Logos}; use logos::{Lexer, Logos};
use std::collections::HashMap; use std::collections::HashMap;
use std::num::ParseIntError;
/// Parse a php literal /// Parse a php literal
/// ///
@ -47,7 +48,7 @@ pub fn parse_lexer<'source>(
.with_span(lexer.span(), source)?; .with_span(lexer.span(), source)?;
let value = match token { let value = match token {
Token::Bool => Value::Bool(lexer.slice().parse().with_span(lexer.span(), source)?), Token::Bool => Value::Bool(lexer.slice().parse().with_span(lexer.span(), source)?),
Token::Integer => Value::Int(lexer.slice().parse().with_span(lexer.span(), source)?), Token::Integer => Value::Int(parse_int(lexer.slice()).with_span(lexer.span(), source)?),
Token::Float => Value::Float(lexer.slice().parse().with_span(lexer.span(), source)?), Token::Float => Value::Float(lexer.slice().parse().with_span(lexer.span(), source)?),
Token::LiteralString => { Token::LiteralString => {
Value::String(parse_string(lexer.slice()).with_span(lexer.span(), source)?) Value::String(parse_string(lexer.slice()).with_span(lexer.span(), source)?)
@ -72,6 +73,18 @@ fn parse_string(literal: &str) -> Result<String, UnescapeError> {
} }
} }
fn parse_int(literal: &str) -> Result<i64, ParseIntError> {
let stripped = literal.replace('_', "");
match stripped.as_bytes() {
[b'0', b'x', tail @ ..] => i64::from_str_radix(std::str::from_utf8(tail).unwrap(), 16),
[b'0', b'b', tail @ ..] => i64::from_str_radix(std::str::from_utf8(tail).unwrap(), 2),
[b'0', tail @ ..] if tail.len() > 0 => {
i64::from_str_radix(std::str::from_utf8(tail).unwrap(), 8)
}
tail => i64::from_str_radix(std::str::from_utf8(tail).unwrap(), 10),
}
}
#[derive(Default)] #[derive(Default)]
struct ArrayBuilder { struct ArrayBuilder {
next_int_key: i64, next_int_key: i64,
@ -264,4 +277,9 @@ fn test_parse() {
}), }),
parse(r#"["foo" => true, "nested" => ['foo' => null]]"#).unwrap() parse(r#"["foo" => true, "nested" => ['foo' => null]]"#).unwrap()
); );
assert_eq!(Value::Int(-432), parse(r#"-432"#).unwrap());
assert_eq!(Value::Int(282), parse(r#"0432"#).unwrap());
assert_eq!(Value::Int(26), parse(r#"0x1A"#).unwrap());
assert_eq!(Value::Int(3), parse(r#"0b11"#).unwrap());
assert_eq!(Value::Int(12345), parse(r#"12_34_5"#).unwrap());
} }