more complete float parsing

This commit is contained in:
Robin Appelman 2020-12-07 21:35:14 +01:00
commit 2227135357
2 changed files with 39 additions and 3 deletions

View file

@ -22,7 +22,7 @@ pub enum Token {
Comma, Comma,
#[regex("(\"([^\"\\\\]|\\\\.)*\")|(\'([^\'\\\\]|\\\\.)*\')")] #[regex("(\"([^\"\\\\]|\\\\.)*\")|(\'([^\'\\\\]|\\\\.)*\')")]
LiteralString, LiteralString,
#[regex("-?[0-9]*\\.[0-9]+")] #[regex("-?((([0-9]+(_[0-9]+)*|([0-9]*(_[0-9]+)*[\\.][0-9]+(_[0-9]+)*)|([0-9]+(_[0-9]+)*[\\.][0-9]*(_[0-9]+)*)))[eE][+-]?[0-9]+(_[0-9]+)*|([0-9]*(_[0-9]+)*[\\.][0-9]+(_[0-9]+)*)|([0-9]+(_[0-9]+)*[\\.][0-9]*(_[0-9]+)*))")]
Float, Float,
#[regex("-?(0|[1-9][0-9]*(_[0-9]+)*|0[xX][0-9a-fA-F]+(_[0-9a-fA-F]+)*|0[0-7]+(_[0-7]+)*|0[bB][01]+(_[01]+)*)")] #[regex("-?(0|[1-9][0-9]*(_[0-9]+)*|0[xX][0-9a-fA-F]+(_[0-9a-fA-F]+)*|0[0-7]+(_[0-7]+)*|0[bB][01]+(_[01]+)*)")]
Integer, Integer,
@ -141,3 +141,27 @@ fn test_lex_int() {
assert_eq!(lex.next(), Some(Token::Integer)); assert_eq!(lex.next(), Some(Token::Integer));
assert_eq!(lex.next(), None); assert_eq!(lex.next(), None);
} }
#[test]
fn test_lex_float() {
let source = r###".1,123.0,123e1,123e+1,123e-1,1_23.456"###;
let mut lex = Token::lexer(source);
assert_eq!(lex.next(), Some(Token::Float));
assert_eq!(lex.next(), Some(Token::Comma));
assert_eq!(lex.next(), Some(Token::Float));
assert_eq!(lex.next(), Some(Token::Comma));
assert_eq!(lex.next(), Some(Token::Float));
assert_eq!(lex.next(), Some(Token::Comma));
assert_eq!(lex.next(), Some(Token::Float));
assert_eq!(lex.next(), Some(Token::Comma));
assert_eq!(lex.next(), Some(Token::Float));
assert_eq!(lex.next(), Some(Token::Comma));
assert_eq!(lex.next(), Some(Token::Float));
assert_eq!(lex.next(), None);
}

View file

@ -5,7 +5,7 @@ use crate::string::{unescape_double, unescape_single, UnescapeError};
use crate::{Key, Value}; use crate::{Key, Value};
use logos::{Lexer, Logos}; use logos::{Lexer, Logos};
use std::collections::HashMap; use std::collections::HashMap;
use std::num::ParseIntError; use std::num::{ParseFloatError, ParseIntError};
/// Parse a php literal /// Parse a php literal
/// ///
@ -55,7 +55,7 @@ pub fn parse_lexer<'source>(
.with_span(lexer.span(), source)?, .with_span(lexer.span(), source)?,
), ),
Token::Integer => Value::Int(parse_int(lexer.slice()).with_span(lexer.span(), source)?), Token::Integer => Value::Int(parse_int(lexer.slice()).with_span(lexer.span(), source)?),
Token::Float => Value::Float(lexer.slice().parse().with_span(lexer.span(), source)?), Token::Float => Value::Float(parse_float(lexer.slice()).with_span(lexer.span(), source)?),
Token::LiteralString => { Token::LiteralString => {
Value::String(parse_string(lexer.slice()).with_span(lexer.span(), source)?) Value::String(parse_string(lexer.slice()).with_span(lexer.span(), source)?)
} }
@ -91,6 +91,11 @@ fn parse_int(literal: &str) -> Result<i64, ParseIntError> {
} }
} }
fn parse_float(literal: &str) -> Result<f64, ParseFloatError> {
let stripped = literal.replace('_', "");
stripped.parse()
}
#[derive(Default)] #[derive(Default)]
struct ArrayBuilder { struct ArrayBuilder {
next_int_key: i64, next_int_key: i64,
@ -288,5 +293,12 @@ fn test_parse() {
assert_eq!(Value::Int(26), parse(r#"0x1A"#).unwrap()); assert_eq!(Value::Int(26), parse(r#"0x1A"#).unwrap());
assert_eq!(Value::Int(3), parse(r#"0b11"#).unwrap()); assert_eq!(Value::Int(3), parse(r#"0b11"#).unwrap());
assert_eq!(Value::Int(12345), parse(r#"12_34_5"#).unwrap()); assert_eq!(Value::Int(12345), parse(r#"12_34_5"#).unwrap());
assert_eq!(Value::Bool(true), parse(r#"True"#).unwrap()); assert_eq!(Value::Bool(true), parse(r#"True"#).unwrap());
assert_eq!(Value::Float(-432.0), parse(r#"-432.0"#).unwrap());
assert_eq!(Value::Float(0.12), parse(r#".12"#).unwrap());
assert_eq!(Value::Float(1000.0), parse(r#"10e2"#).unwrap());
assert_eq!(Value::Float(1.0), parse(r#"10e-1"#).unwrap());
assert_eq!(Value::Float(1234.5), parse(r#"12_34.5"#).unwrap());
} }