extend integer literal support

This commit is contained in:
Robin Appelman 2020-12-07 21:14:47 +01:00
commit a19ce28d07
2 changed files with 44 additions and 2 deletions

View file

@ -24,7 +24,7 @@ pub enum Token {
LiteralString,
#[regex("-?[0-9]*\\.[0-9]+")]
Float,
#[regex("-?[0-9]+")]
#[regex("-?(0|[1-9][0-9]*(_[0-9]+)*|0[xX][0-9a-fA-F]+(_[0-9a-fA-F]+)*|0[0-7]+(_[0-7]+)*|0[bB][01]+(_[01]+)*)")]
Integer,
#[error]
#[regex(r"[ \t\n\f]+", logos::skip)]
@ -117,3 +117,27 @@ fn test_lex() {
assert_eq!(lex.next(), None);
}
#[test]
fn test_lex_int() {
let source = r###"0,123,0x123,0123,0b111,12_34_56"###;
let mut lex = Token::lexer(source);
assert_eq!(lex.next(), Some(Token::Integer));
assert_eq!(lex.next(), Some(Token::Comma));
assert_eq!(lex.next(), Some(Token::Integer));
assert_eq!(lex.next(), Some(Token::Comma));
assert_eq!(lex.next(), Some(Token::Integer));
assert_eq!(lex.next(), Some(Token::Comma));
assert_eq!(lex.next(), Some(Token::Integer));
assert_eq!(lex.next(), Some(Token::Comma));
assert_eq!(lex.next(), Some(Token::Integer));
assert_eq!(lex.next(), Some(Token::Comma));
assert_eq!(lex.next(), Some(Token::Integer));
assert_eq!(lex.next(), None);
}

View file

@ -5,6 +5,7 @@ use crate::string::{unescape_double, unescape_single, UnescapeError};
use crate::{Key, Value};
use logos::{Lexer, Logos};
use std::collections::HashMap;
use std::num::ParseIntError;
/// Parse a php literal
///
@ -47,7 +48,7 @@ pub fn parse_lexer<'source>(
.with_span(lexer.span(), source)?;
let value = match token {
Token::Bool => Value::Bool(lexer.slice().parse().with_span(lexer.span(), source)?),
Token::Integer => Value::Int(lexer.slice().parse().with_span(lexer.span(), source)?),
Token::Integer => Value::Int(parse_int(lexer.slice()).with_span(lexer.span(), source)?),
Token::Float => Value::Float(lexer.slice().parse().with_span(lexer.span(), source)?),
Token::LiteralString => {
Value::String(parse_string(lexer.slice()).with_span(lexer.span(), source)?)
@ -72,6 +73,18 @@ fn parse_string(literal: &str) -> Result<String, UnescapeError> {
}
}
fn parse_int(literal: &str) -> Result<i64, ParseIntError> {
let stripped = literal.replace('_', "");
match stripped.as_bytes() {
[b'0', b'x', tail @ ..] => i64::from_str_radix(std::str::from_utf8(tail).unwrap(), 16),
[b'0', b'b', tail @ ..] => i64::from_str_radix(std::str::from_utf8(tail).unwrap(), 2),
[b'0', tail @ ..] if tail.len() > 0 => {
i64::from_str_radix(std::str::from_utf8(tail).unwrap(), 8)
}
tail => i64::from_str_radix(std::str::from_utf8(tail).unwrap(), 10),
}
}
#[derive(Default)]
struct ArrayBuilder {
next_int_key: i64,
@ -264,4 +277,9 @@ fn test_parse() {
}),
parse(r#"["foo" => true, "nested" => ['foo' => null]]"#).unwrap()
);
assert_eq!(Value::Int(-432), parse(r#"-432"#).unwrap());
assert_eq!(Value::Int(282), parse(r#"0432"#).unwrap());
assert_eq!(Value::Int(26), parse(r#"0x1A"#).unwrap());
assert_eq!(Value::Int(3), parse(r#"0b11"#).unwrap());
assert_eq!(Value::Int(12345), parse(r#"12_34_5"#).unwrap());
}