finish serde support

This commit is contained in:
Robin Appelman 2020-12-14 00:09:00 +01:00
commit cf2fda0c8c
9 changed files with 717 additions and 421 deletions

View file

@ -1,7 +1,7 @@
[package]
name = "php-literal-parser"
description = "parser for php literals"
version = "0.1.2"
version = "0.2.0"
authors = ["Robin Appelman <robin@icewind.nl>"]
edition = "2018"
license = "MIT OR Apache-2.0"

View file

@ -4,14 +4,40 @@ parser for php literals.
## Usage
Parse into a generic representation
```rust
use php_literal_parser::parse;
use std::error::Error;
use php_literal_parser::{from_str, Value, ParseError};
fn main() -> Result<(), ParseError> {
let map = from_str::<Value>(r#"["foo" => true, "nested" => ['foo' => false]]"#)?;
fn main() -> Result<(), Box<dyn Error>> {
let map = parse(r#"["foo" => true, "nested" => ['foo' => false]]"#)?;
assert_eq!(map["foo"], true);
assert_eq!(map["nested"]["foo"], false);
Ok(())
}
```
Or parse into a specific struct using serde
```rust
use php_literal_parser::{from_str, ParseError};
use serde::Deserialize;
#[derive(Debug, Deserialize, PartialEq)]
struct Target {
foo: bool,
bars: Vec<u8>
}
fn main() -> Result<(), ParseError> {
let target = from_str(r#"["foo" => true, "bars" => [1, 2, 3, 4,]]"#)?;
assert_eq!(Target {
foo: true,
bars: vec![1, 2, 3, 4]
}, target);
Ok(())
}
```

View file

@ -1,4 +1,4 @@
use php_literal_parser::parse;
use php_literal_parser::{from_str, Value};
fn main() {
let source = r###"
@ -17,7 +17,7 @@ fn main() {
)
"###;
match parse(source) {
match from_str::<Value>(source) {
Ok(result) => print!("{:#?}", result),
Err(err) => eprint!("{}", err.with_source(source)),
}

View file

@ -29,21 +29,36 @@ use thiserror::Error;
/// ```
///
#[derive(Debug)]
pub struct SpannedError<T: Error + Debug> {
span: Span,
error: T,
pub struct ParseError {
span: Option<Span>,
error: RawParseError,
}
impl<T: Error + Debug> SpannedError<T> {
pub fn new(error: T, span: Span) -> Self {
SpannedError { span, error }
impl serde::de::Error for ParseError {
fn custom<T>(msg: T) -> Self
where
T: Display,
{
ParseError {
span: None,
error: RawParseError::custom(msg),
}
}
}
pub fn error(&self) -> &T {
impl ParseError {
pub fn new(error: RawParseError, span: Span) -> Self {
ParseError {
span: Some(span),
error,
}
}
pub fn error(&self) -> &RawParseError {
&self.error
}
pub fn with_source(self, source: &str) -> SourceSpannedError<T> {
pub fn with_source(self, source: &str) -> SourceSpannedError {
SourceSpannedError {
span: self.span,
error: self.error,
@ -52,36 +67,50 @@ impl<T: Error + Debug> SpannedError<T> {
}
}
impl<T: Error + Debug + 'static> Error for SpannedError<T> {
impl Error for ParseError {
fn source(&self) -> Option<&(dyn Error + 'static)> {
Some(&self.error)
}
}
impl<T: Error + Debug> Display for SpannedError<T> {
impl Display for ParseError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
<T as Display>::fmt(&self.error, f)
<RawParseError as Display>::fmt(&self.error, f)
}
}
pub struct SourceSpannedError<'source, T> {
span: Span,
error: T,
impl From<RawParseError> for ParseError {
fn from(err: RawParseError) -> Self {
ParseError {
span: None,
error: err,
}
}
}
pub struct SourceSpannedError<'source> {
span: Option<Span>,
error: RawParseError,
source: &'source str,
}
impl<'source, T: Error + Debug + 'static> SourceSpannedError<'source, T> {
pub fn spanned(self) -> SpannedError<T> {
SpannedError::new(self.error, self.span)
impl<'source> SourceSpannedError<'source> {
pub fn into_inner(self) -> ParseError {
ParseError {
span: self.span,
error: self.error,
}
}
}
const METRICS: DefaultMetrics = DefaultMetrics::with_tab_stop(4);
impl<'source, T: Error + Debug> Display for SourceSpannedError<'source, T> {
impl<'source> Display for SourceSpannedError<'source> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let start = get_position(self.source, self.span.start);
let end = get_position(self.source, self.span.end);
match self.span.as_ref() {
Some(span) => {
let start = get_position(self.source, span.start);
let end = get_position(self.source, span.end);
let span = SourceSpan::new(start, end, end.next_line());
let mut fmt = Formatter::with_margin_color(Color::Blue);
@ -103,6 +132,9 @@ impl<'source, T: Error + Debug> Display for SourceSpannedError<'source, T> {
)
.unwrap();
write!(f, "{}", formatted)?;
}
None => write!(f, "{}", self.error)?,
}
Ok(())
}
}
@ -117,7 +149,7 @@ fn get_position(text: &str, index: usize) -> Position {
}
#[derive(Error, Debug)]
pub enum ParseError {
pub enum RawParseError {
#[error("{0}")]
UnexpectedToken(#[from] UnexpectedTokenError),
#[error("Invalid boolean literal: {0}")]
@ -132,11 +164,22 @@ pub enum ParseError {
UnexpectedArrayKey,
#[error("Trailing characters after parsing")]
TrailingCharacters,
#[error("{0}")]
Custom(String),
}
impl From<UnescapeError> for ParseError {
impl serde::de::Error for RawParseError {
fn custom<T>(msg: T) -> Self
where
T: Display,
{
RawParseError::Custom(msg.to_string())
}
}
impl From<UnescapeError> for RawParseError {
fn from(_: UnescapeError) -> Self {
ParseError::InvalidStringLiteral
RawParseError::InvalidStringLiteral
}
}
@ -180,17 +223,11 @@ impl Display for UnexpectedTokenError {
impl Error for UnexpectedTokenError {}
pub trait ExpectToken<'source> {
fn expect_token(
self,
expected: &[Token],
) -> Result<SpannedToken<'source>, SpannedError<ParseError>>;
fn expect_token(self, expected: &[Token]) -> Result<SpannedToken<'source>, ParseError>;
}
impl<'source> ExpectToken<'source> for Option<SpannedToken<'source>> {
fn expect_token(
self,
expected: &[Token],
) -> Result<SpannedToken<'source>, SpannedError<ParseError>> {
fn expect_token(self, expected: &[Token]) -> Result<SpannedToken<'source>, ParseError> {
self.ok_or_else(|| UnexpectedTokenError {
expected: expected.to_vec(),
found: None,
@ -201,10 +238,7 @@ impl<'source> ExpectToken<'source> for Option<SpannedToken<'source>> {
}
impl<'a, 'source> ExpectToken<'source> for Option<&'a SpannedToken<'source>> {
fn expect_token(
self,
expected: &[Token],
) -> Result<SpannedToken<'source>, SpannedError<ParseError>> {
fn expect_token(self, expected: &[Token]) -> Result<SpannedToken<'source>, ParseError> {
self.ok_or_else(|| UnexpectedTokenError {
expected: expected.to_vec(),
found: None,
@ -215,10 +249,7 @@ impl<'a, 'source> ExpectToken<'source> for Option<&'a SpannedToken<'source>> {
}
impl<'source> ExpectToken<'source> for SpannedToken<'source> {
fn expect_token(
self,
expected: &[Token],
) -> Result<SpannedToken<'source>, SpannedError<ParseError>> {
fn expect_token(self, expected: &[Token]) -> Result<SpannedToken<'source>, ParseError> {
if expected.iter().any(|expect| self.token.eq(expect)) {
Ok(self)
} else {
@ -231,14 +262,14 @@ impl<'source> ExpectToken<'source> for SpannedToken<'source> {
}
}
pub trait ResultExt<T, E: Error + Debug> {
fn with_span(self, span: Span) -> Result<T, SpannedError<E>>;
pub trait ResultExt<T> {
fn with_span(self, span: Span) -> Result<T, ParseError>;
}
impl<T, E: Into<ParseError>> ResultExt<T, ParseError> for Result<T, E> {
fn with_span(self, span: Span) -> Result<T, SpannedError<ParseError>> {
self.map_err(|error| SpannedError {
span,
impl<T, E: Into<RawParseError>> ResultExt<T> for Result<T, E> {
fn with_span(self, span: Span) -> Result<T, ParseError> {
self.map_err(|error| ParseError {
span: Some(span),
error: error.into(),
})
}

View file

@ -1,6 +1,8 @@
use logos::{Lexer, Logos, Span};
use serde::export::fmt::Debug;
use serde::export::Formatter;
#[derive(Logos, Debug, PartialEq, Clone)]
#[derive(Logos, Debug, PartialEq, Clone, Copy)]
pub enum Token {
#[token("array")]
Array,
@ -166,7 +168,7 @@ fn test_lex_float() {
assert_eq!(lex.next(), None);
}
#[derive(Clone, Debug)]
#[derive(Clone)]
pub struct SpannedToken<'source> {
pub token: Token,
pub span: Span,
@ -179,6 +181,17 @@ impl<'source> SpannedToken<'source> {
}
}
impl<'source> Debug for SpannedToken<'source> {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
write!(
f,
"SpannedToken {{ {:?}: \"{}\"}} ",
self.token,
self.slice()
)
}
}
pub struct TokenStream<'source> {
lexer: Lexer<'source, Token>,
}

View file

@ -2,15 +2,17 @@
//!
//! Allows parsing of php string, bool, number and array literals.
//!
//! ## Example
//! ## Usage
//!
//! Parse into a generic representation
//!
//! ```rust
//! use php_literal_parser::{parse, Value, Key};
//! use php_literal_parser::{from_str, Value};
//! # use std::fmt::Debug;
//! # use std::error::Error;
//!
//! # fn main() -> Result<(), Box<dyn Error>> {
//! let map = parse(r#"["foo" => true, "nested" => ['foo' => false]]"#)?;
//! let map = from_str::<Value>(r#"["foo" => true, "nested" => ['foo' => false]]"#)?;
//!
//! assert_eq!(map["foo"], true);
//! assert_eq!(map["nested"]["foo"], false);
@ -18,18 +20,48 @@
//! # }
//! ```
//!
//! Or parse into a specific struct using serde
//!
//! ```rust
//! use php_literal_parser::from_str;
//! # use serde_derive::Deserialize;
//! use serde::Deserialize;
//! # use std::fmt::Debug;
//! # use std::error::Error;
//!
//! #[derive(Debug, Deserialize, PartialEq)]
//! struct Target {
//! foo: bool,
//! bars: Vec<u8>
//! }
//!
//! # fn main() -> Result<(), Box<dyn Error>> {
//! let target = from_str(r#"["foo" => true, "bars" => [1, 2, 3, 4,]]"#)?;
//!
//! assert_eq!(Target {
//! foo: true,
//! bars: vec![1,2,3,4]
//! }, target);
//! # Ok(())
//! # }
//! ```
//!
#![forbid(unsafe_code)]
mod error;
mod lexer;
mod num;
mod parser;
mod serde;
mod serde_impl;
mod string;
pub use error::{ParseError, SpannedError};
pub use parser::parse;
use crate::string::is_array_key_numeric;
pub use error::{ParseError, RawParseError};
use serde::de::{self, MapAccess, SeqAccess, Visitor};
use serde::{Deserialize, Deserializer};
pub use serde_impl::from_str;
use std::borrow::Borrow;
use std::collections::HashMap;
use std::convert::TryInto;
use std::fmt::{Display, Formatter};
use std::hash::{Hash, Hasher};
use std::ops::Index;
@ -369,3 +401,298 @@ fn test_index() {
assert_eq!(map[1], true);
assert_eq!(map[Key::Int(1)], true);
}
struct ValueVisitor;
impl<'de> Visitor<'de> for ValueVisitor {
type Value = Value;
fn expecting(&self, formatter: &mut Formatter) -> std::fmt::Result {
formatter.write_str("any php literal")
}
fn visit_bool<E>(self, v: bool) -> Result<Self::Value, E>
where
E: de::Error,
{
Ok(Value::Bool(v))
}
fn visit_i8<E>(self, v: i8) -> Result<Self::Value, E>
where
E: de::Error,
{
Ok(Value::Int(v.into()))
}
fn visit_i16<E>(self, v: i16) -> Result<Self::Value, E>
where
E: de::Error,
{
Ok(Value::Int(v.into()))
}
fn visit_i32<E>(self, v: i32) -> Result<Self::Value, E>
where
E: de::Error,
{
Ok(Value::Int(v.into()))
}
fn visit_i64<E>(self, v: i64) -> Result<Self::Value, E>
where
E: de::Error,
{
Ok(Value::Int(v))
}
fn visit_u8<E>(self, v: u8) -> Result<Self::Value, E>
where
E: de::Error,
{
Ok(Value::Int(v.into()))
}
fn visit_u16<E>(self, v: u16) -> Result<Self::Value, E>
where
E: de::Error,
{
Ok(Value::Int(v.into()))
}
fn visit_u32<E>(self, v: u32) -> Result<Self::Value, E>
where
E: de::Error,
{
Ok(Value::Int(v.into()))
}
fn visit_u64<E>(self, v: u64) -> Result<Self::Value, E>
where
E: de::Error,
{
Ok(Value::Int(v.try_into().map_err(|_| {
E::custom(format!("i64 out of range: {}", v))
})?))
}
fn visit_f32<E>(self, v: f32) -> Result<Self::Value, E>
where
E: de::Error,
{
Ok(Value::Float(v.into()))
}
fn visit_f64<E>(self, v: f64) -> Result<Self::Value, E>
where
E: de::Error,
{
Ok(Value::Float(v))
}
fn visit_char<E>(self, v: char) -> Result<Self::Value, E>
where
E: de::Error,
{
Ok(Value::String(v.into()))
}
fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
where
E: de::Error,
{
Ok(Value::String(v.into()))
}
fn visit_borrowed_str<E>(self, v: &'de str) -> Result<Self::Value, E>
where
E: de::Error,
{
Ok(Value::String(v.into()))
}
fn visit_string<E>(self, v: String) -> Result<Self::Value, E>
where
E: de::Error,
{
Ok(Value::String(v))
}
fn visit_unit<E>(self) -> Result<Self::Value, E>
where
E: de::Error,
{
Ok(Value::Null)
}
fn visit_seq<A>(self, mut seq: A) -> Result<Self::Value, <A as SeqAccess<'de>>::Error>
where
A: SeqAccess<'de>,
{
let mut result = HashMap::new();
let mut next_key = 0;
while let Some(value) = seq.next_element::<Value>()? {
let key = Key::Int(next_key);
next_key += 1;
result.insert(key, value);
}
Ok(Value::Array(result))
}
fn visit_map<A>(self, mut map: A) -> Result<Self::Value, <A as MapAccess<'de>>::Error>
where
A: MapAccess<'de>,
{
let mut result = HashMap::new();
while let Some((key, value)) = map.next_entry()? {
result.insert(key, value);
}
Ok(Value::Array(result))
}
}
impl<'de> Deserialize<'de> for Value {
fn deserialize<D>(deserializer: D) -> Result<Value, D::Error>
where
D: Deserializer<'de>,
{
deserializer.deserialize_any(ValueVisitor)
}
}
struct KeyVisitor;
impl<'de> Visitor<'de> for KeyVisitor {
type Value = Key;
fn expecting(&self, formatter: &mut Formatter) -> std::fmt::Result {
formatter.write_str("a string, number, bool or null")
}
fn visit_bool<E>(self, v: bool) -> Result<Self::Value, E>
where
E: de::Error,
{
Ok(Key::Int(if v { 1 } else { 0 }))
}
fn visit_i8<E>(self, v: i8) -> Result<Self::Value, E>
where
E: de::Error,
{
Ok(Key::Int(v.into()))
}
fn visit_i16<E>(self, v: i16) -> Result<Self::Value, E>
where
E: de::Error,
{
Ok(Key::Int(v.into()))
}
fn visit_i32<E>(self, v: i32) -> Result<Self::Value, E>
where
E: de::Error,
{
Ok(Key::Int(v.into()))
}
fn visit_i64<E>(self, v: i64) -> Result<Self::Value, E>
where
E: de::Error,
{
Ok(Key::Int(v))
}
fn visit_u8<E>(self, v: u8) -> Result<Self::Value, E>
where
E: de::Error,
{
Ok(Key::Int(v.into()))
}
fn visit_u16<E>(self, v: u16) -> Result<Self::Value, E>
where
E: de::Error,
{
Ok(Key::Int(v.into()))
}
fn visit_u32<E>(self, v: u32) -> Result<Self::Value, E>
where
E: de::Error,
{
Ok(Key::Int(v.into()))
}
fn visit_u64<E>(self, v: u64) -> Result<Self::Value, E>
where
E: de::Error,
{
Ok(Key::Int(v.try_into().map_err(|_| {
E::custom(format!("i64 out of range: {}", v))
})?))
}
fn visit_f32<E>(self, v: f32) -> Result<Self::Value, E>
where
E: de::Error,
{
Ok(Key::Int(v as i64))
}
fn visit_f64<E>(self, v: f64) -> Result<Self::Value, E>
where
E: de::Error,
{
Ok(Key::Int(v as i64))
}
fn visit_char<E>(self, v: char) -> Result<Self::Value, E>
where
E: de::Error,
{
Ok(Key::String(v.into()))
}
fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
where
E: de::Error,
{
self.visit_string(v.into())
}
fn visit_borrowed_str<E>(self, v: &'de str) -> Result<Self::Value, E>
where
E: de::Error,
{
self.visit_string(v.into())
}
fn visit_string<E>(self, v: String) -> Result<Self::Value, E>
where
E: de::Error,
{
if is_array_key_numeric(&v) {
Ok(Key::Int(v.parse().unwrap()))
} else {
Ok(Key::String(v))
}
}
fn visit_unit<E>(self) -> Result<Self::Value, E>
where
E: de::Error,
{
Ok(Key::String(String::from("")))
}
}
impl<'de> Deserialize<'de> for Key {
fn deserialize<D>(deserializer: D) -> Result<Key, D::Error>
where
D: Deserializer<'de>,
{
deserializer.deserialize_any(KeyVisitor)
}
}

View file

@ -1,35 +1,12 @@
use crate::error::{ExpectToken, ParseError, ResultExt, SpannedError};
use crate::error::{ExpectToken, ParseError, ResultExt};
use crate::lexer::{SpannedToken, Token, TokenStream};
use crate::num::parse_int;
use crate::string::{is_array_key_numeric, parse_string};
use crate::{Key, Value};
use logos::Logos;
use std::collections::HashMap;
use std::iter::Peekable;
use std::num::ParseFloatError;
/// Parse a php literal
///
/// ## Example
///
/// ```rust
/// use php_literal_parser::{parse, Value, Key};
/// # use std::fmt::Debug;
/// # use std::error::Error;
///
/// # fn main() -> Result<(), Box<dyn Error>> {
/// let map = parse(r#"["foo" => true, "nested" => ['foo' => false]]"#)?;
///
/// assert_eq!(map["foo"], true);
/// assert_eq!(map["nested"]["foo"], false);
/// # Ok(())
/// # }
/// ```
///
pub fn parse(source: &str) -> Result<Value, SpannedError<ParseError>> {
Parser::new(source).run()
}
pub struct Parser<'source> {
tokens: Peekable<TokenStream<'source>>,
}
@ -45,39 +22,20 @@ impl<'source> Parser<'source> {
self.tokens.next()
}
pub fn eat_token(&mut self) {
let _ = self.tokens.next();
}
pub fn run(&mut self) -> Result<Value, SpannedError<ParseError>> {
let token = self.tokens.next().expect_token(&[
Token::Bool,
Token::Integer,
Token::Float,
Token::LiteralString,
Token::Null,
Token::Array,
Token::SquareOpen,
])?;
self.parse_any(token)
}
pub fn parse_any(&mut self, token: SpannedToken) -> Result<Value, SpannedError<ParseError>> {
pub fn parse_literal(&self, token: SpannedToken) -> Result<Value, ParseError> {
let value = match token.token {
Token::Bool => Value::Bool(self.parse_bool_token(token)?),
Token::Integer => Value::Int(self.parse_int_token(token)?),
Token::Float => Value::Float(self.parse_float_token(token)?),
Token::LiteralString => Value::String(self.parse_string_token(token)?),
Token::Null => Value::Null,
Token::Array => Value::Array(self.parse_array(ArraySyntax::Long)?),
Token::SquareOpen => Value::Array(self.parse_array(ArraySyntax::Short)?),
_ => unreachable!(),
};
Ok(value)
}
pub fn parse_bool_token(&self, token: SpannedToken) -> Result<bool, SpannedError<ParseError>> {
pub fn parse_bool_token(&self, token: SpannedToken) -> Result<bool, ParseError> {
token
.slice()
.to_ascii_lowercase()
@ -85,99 +43,19 @@ impl<'source> Parser<'source> {
.with_span(token.span)
}
pub fn parse_int_token(&self, token: SpannedToken) -> Result<i64, SpannedError<ParseError>> {
pub fn parse_int_token(&self, token: SpannedToken) -> Result<i64, ParseError> {
parse_int(token.slice()).with_span(token.span)
}
pub fn parse_float_token(&self, token: SpannedToken) -> Result<f64, SpannedError<ParseError>> {
pub fn parse_float_token(&self, token: SpannedToken) -> Result<f64, ParseError> {
parse_float(token.slice()).with_span(token.span)
}
pub fn parse_string_token(
&self,
token: SpannedToken,
) -> Result<String, SpannedError<ParseError>> {
pub fn parse_string_token(&self, token: SpannedToken) -> Result<String, ParseError> {
parse_string(token.slice()).with_span(token.span)
}
pub fn parse_array(
&mut self,
syntax: ArraySyntax,
) -> Result<HashMap<Key, Value>, SpannedError<ParseError>> {
let mut builder = ArrayBuilder::default();
if syntax == ArraySyntax::Long {
self.tokens.next().expect_token(&[Token::BracketOpen])?;
}
loop {
let key_value_or_close_token = self.tokens.next().expect_token(&[
Token::Bool,
Token::Integer,
Token::Float,
Token::LiteralString,
Token::Null,
Token::Array,
Token::SquareOpen,
syntax.close_bracket(),
])?;
// trailing comma or empty array
if key_value_or_close_token.token == syntax.close_bracket() {
break;
}
let next = self.tokens.next().expect_token(&[
syntax.close_bracket(),
Token::Comma,
Token::Arrow,
])?;
match next.token {
Token::BracketClose => {
builder.push_value(self.parse_any(key_value_or_close_token)?);
break;
}
Token::SquareClose => {
builder.push_value(self.parse_any(key_value_or_close_token)?);
break;
}
Token::Comma => {
builder.push_value(self.parse_any(key_value_or_close_token)?);
}
Token::Arrow => {
let key = self.parse_array_key(key_value_or_close_token)?;
let value = self.run()?;
builder.push_key_value(key, value);
match self
.tokens
.next()
.expect_token(&[syntax.close_bracket(), Token::Comma])?
.token
{
Token::BracketClose => {
break;
}
Token::SquareClose => {
break;
}
Token::Comma => {}
_ => unreachable!(),
}
}
_ => {
unreachable!();
}
}
}
Ok(builder.data)
}
pub fn parse_array_key(
&mut self,
token: SpannedToken,
) -> Result<Key, SpannedError<ParseError>> {
pub fn parse_array_key(&self, token: SpannedToken) -> Result<Key, ParseError> {
let token = token.expect_token(&[
Token::Bool,
Token::Integer,
@ -185,7 +63,7 @@ impl<'source> Parser<'source> {
Token::LiteralString,
Token::Null,
])?;
Ok(match self.parse_any(token)? {
Ok(match self.parse_literal(token)? {
Value::Int(int) => Key::Int(int),
Value::Float(float) => Key::Int(float as i64),
Value::String(str) if is_array_key_numeric(&str) => Key::Int(parse_int(&str).unwrap()),
@ -202,27 +80,6 @@ fn parse_float(literal: &str) -> Result<f64, ParseFloatError> {
stripped.parse()
}
#[derive(Default)]
struct ArrayBuilder {
data: HashMap<Key, Value>,
next_int_key: i64,
}
impl ArrayBuilder {
fn push_value(&mut self, value: Value) {
let key = Key::Int(self.next_int_key);
self.next_int_key += 1;
self.data.insert(key, value);
}
fn push_key_value(&mut self, key: Key, value: Value) {
if let Key::Int(int) = &key {
self.next_int_key = int + 1;
}
self.data.insert(key, value);
}
}
#[derive(Eq, PartialEq, Copy, Clone)]
pub enum ArraySyntax {
Short,
@ -237,101 +94,3 @@ impl ArraySyntax {
}
}
}
#[test]
fn test_parse() {
use maplit::hashmap;
assert_eq!(Value::Bool(true), parse("true").unwrap());
assert_eq!(Value::Bool(false), parse("false").unwrap());
assert_eq!(Value::Int(12), parse("12").unwrap());
assert_eq!(Value::Int(-1), parse("-1").unwrap());
assert_eq!(Value::Float(1.12), parse("1.12").unwrap());
assert_eq!(
Value::String("test".to_string()),
parse(r#""test""#).unwrap()
);
assert_eq!(Value::Array(hashmap! {}), parse(r#"array()"#).unwrap());
assert_eq!(
Value::Array(hashmap! {
Key::Int(0) => Value::Int(3),
Key::Int(1) => Value::Int(4),
Key::Int(2) => Value::Int(5),
}),
parse(r#"array(3,4,5)"#).unwrap()
);
assert_eq!(
Value::Array(hashmap! {
Key::Int(0) => Value::Int(3),
Key::Int(1) => Value::Int(4),
Key::Int(2) => Value::Int(5),
}),
parse(r#"array(3,4,5,)"#).unwrap()
);
assert_eq!(
Value::Array(hashmap! {
Key::Int(1) => Value::Int(3),
Key::Int(3) => Value::Int(4),
Key::Int(5) => Value::Int(5),
}),
parse(r#"array(1=>3,3=>4,5=>5)"#).unwrap()
);
assert_eq!(
Value::Array(hashmap! {
Key::Int(1) => Value::Int(3),
Key::Int(2) => Value::Int(4),
Key::Int(3) => Value::Int(5),
}),
parse(r#"array(1=>3,4,5)"#).unwrap()
);
assert_eq!(
Value::Array(hashmap! {
Key::Int(1) => Value::Int(3),
Key::String("foo".into()) => Value::Int(4),
Key::Int(2) => Value::Int(5),
}),
parse(r#"array(1=>3,"foo" => 4,5)"#).unwrap()
);
assert_eq!(
Value::Array(hashmap! {
Key::String("foo".into()) => Value::Bool(true),
Key::String("nested".into()) => Value::Array(hashmap! {
Key::String("foo".into()) => Value::Bool(false),
}),
}),
parse(r#"array("foo" => true, "nested" => array ('foo' => false))"#).unwrap()
);
assert_eq!(
Value::Array(hashmap! {
Key::String("foo".into()) => Value::Bool(true),
Key::String("nested".into()) => Value::Array(hashmap! {
Key::String("foo".into()) => Value::Null,
}),
}),
parse(r#"["foo" => true, "nested" => ['foo' => null]]"#).unwrap()
);
assert_eq!(Value::Int(-432), parse(r#"-432"#).unwrap());
assert_eq!(Value::Int(282), parse(r#"0432"#).unwrap());
assert_eq!(Value::Int(26), parse(r#"0x1A"#).unwrap());
assert_eq!(Value::Int(3), parse(r#"0b11"#).unwrap());
assert_eq!(Value::Int(12345), parse(r#"12_34_5"#).unwrap());
assert_eq!(Value::Bool(true), parse(r#"True"#).unwrap());
assert_eq!(Value::Float(-432.0), parse(r#"-432.0"#).unwrap());
assert_eq!(Value::Float(0.12), parse(r#".12"#).unwrap());
assert_eq!(Value::Float(1000.0), parse(r#"10e2"#).unwrap());
assert_eq!(Value::Float(1.0), parse(r#"10e-1"#).unwrap());
assert_eq!(Value::Float(1234.5), parse(r#"12_34.5"#).unwrap());
assert_eq!(
Value::Array(hashmap! {
Key::Int(2) => Value::Int(3),
Key::String("foo".into()) => Value::Int(4),
Key::String("".into()) => Value::Int(5),
Key::Int(1) => Value::Int(6),
Key::Int(0) => Value::Int(7),
}),
parse(r#"array("2"=>3,"foo" => 4, null => 5, true => 6, false => 7)"#).unwrap()
);
}

View file

@ -8,75 +8,44 @@ use crate::error::{ExpectToken, ResultExt};
use crate::lexer::{SpannedToken, Token};
use crate::num::ParseIntError;
use crate::parser::{ArraySyntax, Parser};
use crate::{Key, ParseError, SpannedError};
use crate::{Key, ParseError, RawParseError};
use serde::export::TryFrom;
use std::error::Error;
use std::fmt::{Display, Formatter};
use std::collections::VecDeque;
#[derive(Debug)]
pub enum SerdeParseError {
Parse(ParseError),
Spanned(SpannedError<ParseError>),
Custom(String),
}
impl From<SpannedError<ParseError>> for SerdeParseError {
fn from(err: SpannedError<ParseError>) -> Self {
SerdeParseError::Spanned(err)
}
}
impl From<ParseError> for SerdeParseError {
fn from(err: ParseError) -> Self {
SerdeParseError::Parse(err)
}
}
impl Display for SerdeParseError {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
match self {
SerdeParseError::Spanned(err) => write!(f, "{}", err),
SerdeParseError::Parse(err) => write!(f, "{}", err),
SerdeParseError::Custom(err) => write!(f, "{}", err),
}
}
}
impl Error for SerdeParseError {
fn source(&self) -> Option<&(dyn Error + 'static)> {
match self {
SerdeParseError::Spanned(err) => Some(err),
SerdeParseError::Parse(err) => Some(err),
SerdeParseError::Custom(_) => None,
}
}
}
impl serde::de::Error for SerdeParseError {
fn custom<T>(msg: T) -> Self
where
T: Display,
{
SerdeParseError::Custom(msg.to_string())
}
}
type Result<T> = std::result::Result<T, SerdeParseError>;
type Result<T> = std::result::Result<T, ParseError>;
pub struct Deserializer<'de> {
parser: Parser<'de>,
peeked: Option<SpannedToken<'de>>,
peeked: VecDeque<SpannedToken<'de>>,
}
impl<'de> Deserializer<'de> {
pub fn from_str(input: &'de str) -> Self {
Deserializer {
parser: Parser::new(input),
peeked: None,
peeked: Default::default(),
}
}
}
/// Parse a php literal
///
/// ## Example
///
/// ```rust
/// use php_literal_parser::{from_str, Value, Key};
/// # use std::fmt::Debug;
/// # use std::error::Error;
///
/// # fn main() -> Result<(), Box<dyn Error>> {
/// let map = from_str::<Value>(r#"["foo" => true, "nested" => ['foo' => false]]"#)?;
///
/// assert_eq!(map["foo"], true);
/// assert_eq!(map["nested"]["foo"], false);
/// # Ok(())
/// # }
/// ```
///
pub fn from_str<'a, T>(s: &'a str) -> Result<T>
where
T: Deserialize<'a>,
@ -86,26 +55,25 @@ where
if deserializer.next_token().is_none() {
Ok(t)
} else {
Err(ParseError::TrailingCharacters.into())
Err(RawParseError::TrailingCharacters.into())
}
}
impl<'de> Deserializer<'de> {
fn next_token(&mut self) -> Option<SpannedToken<'de>> {
self.peeked.take().or_else(|| self.parser.next_token())
self.peeked.pop_front().or_else(|| self.parser.next_token())
}
fn peek_token(&mut self) -> Option<&SpannedToken<'de>> {
if self.peeked.is_none() {
self.peeked = self.next_token()
if self.peeked.is_empty() {
let next = self.next_token()?;
self.peeked.push_back(next)
}
self.peeked.as_ref()
self.peeked.front()
}
fn eat_token(&mut self) {
if self.peeked.take().is_none() {
self.parser.eat_token()
}
let _ = self.next_token();
}
fn parse_bool(&mut self) -> Result<bool> {
@ -113,8 +81,8 @@ impl<'de> Deserializer<'de> {
Ok(self.parser.parse_bool_token(token)?)
}
fn set_peeked(&mut self, peeked: SpannedToken<'de>) {
self.peeked = Some(peeked)
fn push_peeked(&mut self, peeked: SpannedToken<'de>) {
self.peeked.push_back(peeked)
}
fn parse_unsigned<T>(&mut self) -> Result<T>
@ -125,14 +93,17 @@ impl<'de> Deserializer<'de> {
let span = token.span.clone();
let int = self.parser.parse_int_token(token)?;
if int < 0 {
Err(SpannedError::new(
ParseError::InvalidIntLiteral(ParseIntError::UnexpectedNegative),
Err(ParseError::new(
RawParseError::InvalidIntLiteral(ParseIntError::UnexpectedNegative),
span,
)
.into())
} else {
Ok(T::try_from(int).map_err(|_| {
SpannedError::new(ParseError::InvalidIntLiteral(ParseIntError::Overflow), span)
ParseError::new(
RawParseError::InvalidIntLiteral(ParseIntError::Overflow),
span,
)
})?)
}
}
@ -145,7 +116,10 @@ impl<'de> Deserializer<'de> {
let span = token.span.clone();
Ok(
T::try_from(self.parser.parse_int_token(token)?).map_err(|_| {
SpannedError::new(ParseError::InvalidIntLiteral(ParseIntError::Overflow), span)
ParseError::new(
RawParseError::InvalidIntLiteral(ParseIntError::Overflow),
span,
)
})?,
)
}
@ -162,7 +136,7 @@ impl<'de> Deserializer<'de> {
}
impl<'de, 'a> de::Deserializer<'de> for &'a mut Deserializer<'de> {
type Error = SerdeParseError;
type Error = ParseError;
fn deserialize_any<V>(self, visitor: V) -> Result<V::Value>
where
@ -183,7 +157,7 @@ impl<'de, 'a> de::Deserializer<'de> for &'a mut Deserializer<'de> {
Token::LiteralString => self.deserialize_string(visitor),
Token::Integer => self.deserialize_i64(visitor),
Token::Float => self.deserialize_f64(visitor),
Token::Array | Token::SquareOpen => self.deserialize_seq(visitor),
Token::Array | Token::SquareOpen => self.deserialize_map(visitor),
_ => unreachable!(),
}
}
@ -370,7 +344,10 @@ impl<'de, 'a> de::Deserializer<'de> for &'a mut Deserializer<'de> {
.next_token()
.expect_token(&[Token::Array, Token::SquareOpen])?;
let syntax = match token.token {
Token::Array => ArraySyntax::Long,
Token::Array => {
self.next_token().expect_token(&[Token::BracketOpen])?;
ArraySyntax::Long
}
Token::SquareOpen => ArraySyntax::Short,
_ => unreachable!(),
};
@ -406,7 +383,10 @@ impl<'de, 'a> de::Deserializer<'de> for &'a mut Deserializer<'de> {
.next_token()
.expect_token(&[Token::Array, Token::SquareOpen])?;
let syntax = match token.token {
Token::Array => ArraySyntax::Long,
Token::Array => {
self.next_token().expect_token(&[Token::BracketOpen])?;
ArraySyntax::Long
}
Token::SquareOpen => ArraySyntax::Short,
_ => unreachable!(),
};
@ -495,7 +475,7 @@ impl<'source, 'a> ArrayWalker<'source, 'a> {
}
impl<'de, 'a> SeqAccess<'de> for ArrayWalker<'de, 'a> {
type Error = SerdeParseError;
type Error = ParseError;
fn next_element_seed<T>(&mut self, seed: T) -> Result<Option<T::Value>>
where
@ -513,8 +493,14 @@ impl<'de, 'a> SeqAccess<'de> for ArrayWalker<'de, 'a> {
Token::Null,
Token::Array,
Token::SquareOpen,
self.syntax.close_bracket(),
])?;
if token.token == self.syntax.close_bracket() {
self.done = true;
return Ok(None);
}
let next = self.de.next_token().expect_token(&[
self.syntax.close_bracket(),
Token::Comma,
@ -528,7 +514,7 @@ impl<'de, 'a> SeqAccess<'de> for ArrayWalker<'de, 'a> {
let key = self.de.parser.parse_array_key(token)?;
match key {
Key::Int(key) if key == self.next_int_key => Ok(()),
_ => Err(ParseError::UnexpectedArrayKey).with_span(span),
_ => Err(RawParseError::UnexpectedArrayKey).with_span(span),
}?;
self.next_int_key += 1;
let value = self.de.next_token().expect_token(&[
@ -556,15 +542,14 @@ impl<'de, 'a> SeqAccess<'de> for ArrayWalker<'de, 'a> {
_ => unreachable!(),
};
dbg!(value_token.slice());
// Deserialize an array element.
self.de.set_peeked(value_token);
self.de.push_peeked(value_token);
seed.deserialize(&mut *self.de).map(Some)
}
}
impl<'de, 'a> MapAccess<'de> for ArrayWalker<'de, 'a> {
type Error = SerdeParseError;
type Error = ParseError;
fn next_key_seed<K>(&mut self, seed: K) -> Result<Option<K::Value>>
where
@ -580,20 +565,46 @@ impl<'de, 'a> MapAccess<'de> for ArrayWalker<'de, 'a> {
Token::Float,
Token::LiteralString,
Token::Null,
self.syntax.close_bracket(),
])?;
self.de.next_token().expect_token(&[Token::Arrow])?;
if token.token == self.syntax.close_bracket() {
self.done = true;
return Ok(None);
}
let next = self.de.next_token().expect_token(&[
Token::Arrow,
Token::Comma,
self.syntax.close_bracket(),
])?;
match next.token {
Token::Arrow => {
// Deserialize a map key.
self.de.set_peeked(token);
if let Key::Int(int_key) = self.de.parser.parse_array_key(token.clone())? {
self.next_int_key = int_key + 1;
}
self.de.push_peeked(token);
seed.deserialize(&mut *self.de).map(Some)
}
_ => {
// implicit key
let key = self.next_int_key;
self.next_int_key += 1;
self.de.push_peeked(token);
self.de.push_peeked(next);
seed.deserialize(format!("{}", key).into_deserializer())
.map(Some)
}
}
}
fn next_value_seed<V>(&mut self, seed: V) -> Result<V::Value>
where
V: DeserializeSeed<'de>,
{
let token = self.de.next_token().expect_token(&[
self.de.peek_token().expect_token(&[
Token::Bool,
Token::Integer,
Token::Float,
@ -604,7 +615,6 @@ impl<'de, 'a> MapAccess<'de> for ArrayWalker<'de, 'a> {
])?;
// Deserialize a map key.
self.de.set_peeked(token);
let value = seed.deserialize(&mut *self.de)?;
let next = self
@ -635,7 +645,7 @@ impl<'a, 'de> Enum<'a, 'de> {
// Note that all enum deserialization methods in Serde refer exclusively to the
// "externally tagged" enum representation.
impl<'de, 'a> EnumAccess<'de> for Enum<'a, 'de> {
type Error = SerdeParseError;
type Error = ParseError;
type Variant = Self;
fn variant_seed<V>(self, seed: V) -> Result<(V::Value, Self::Variant)>
@ -651,7 +661,7 @@ impl<'de, 'a> EnumAccess<'de> for Enum<'a, 'de> {
// `VariantAccess` is provided to the `Visitor` to give it the ability to see
// the content of the single variant that it decided to deserialize.
impl<'de, 'a> VariantAccess<'de> for Enum<'a, 'de> {
type Error = SerdeParseError;
type Error = ParseError;
fn unit_variant(self) -> Result<()> {
self.de.next_token().expect_token(&[Token::LiteralString])?;
@ -692,14 +702,10 @@ mod tests {
{
match super::from_str(source) {
Ok(res) => Ok(res),
Err(super::SerdeParseError::Spanned(err)) => {
let with_source = err.with_source(source);
eprintln!("{}", with_source);
Err(super::SerdeParseError::Spanned(with_source.spanned()))
}
Err(err) => {
eprintln!("{}", err);
Err(err)
let sourced = err.with_source(source);
eprintln!("{}", sourced);
Err(sourced.into_inner())
}
}
}

134
tests/tests.rs Normal file
View file

@ -0,0 +1,134 @@
use php_literal_parser::{from_str, Key, ParseError, Value};
#[test]
fn test_parse_value() {
fn parse(source: &str) -> Result<Value, ParseError> {
match from_str(source) {
Ok(res) => Ok(res),
Err(err) => {
let sourced = err.with_source(source);
eprintln!("{}", sourced);
Err(sourced.into_inner())
}
}
}
use maplit::hashmap;
assert_eq!(Value::Bool(true), parse("true").unwrap());
assert_eq!(Value::Bool(false), parse("false").unwrap());
assert_eq!(Value::Int(12), parse("12").unwrap());
assert_eq!(Value::Int(-1), parse("-1").unwrap());
assert_eq!(Value::Float(1.12), parse("1.12").unwrap());
assert_eq!(
Value::String("test".to_string()),
parse(r#""test""#).unwrap()
);
assert_eq!(Value::Array(hashmap! {}), parse(r#"array()"#).unwrap());
assert_eq!(
Value::Array(hashmap! {
Key::Int(0) => Value::Int(3),
Key::Int(1) => Value::Int(4),
Key::Int(2) => Value::Int(5),
}),
parse(r#"array(3,4,5)"#).unwrap()
);
assert_eq!(
Value::Array(hashmap! {
Key::Int(0) => Value::Int(3),
Key::Int(1) => Value::Int(4),
Key::Int(2) => Value::Int(5),
}),
parse(r#"array(3,4,5,)"#).unwrap()
);
assert_eq!(
Value::Array(hashmap! {
Key::Int(1) => Value::Int(3),
Key::Int(3) => Value::Int(4),
Key::Int(5) => Value::Int(5),
}),
parse(r#"array(1=>3,3=>4,5=>5)"#).unwrap()
);
assert_eq!(
Value::Array(hashmap! {
Key::Int(1) => Value::Int(3),
Key::Int(2) => Value::Int(4),
Key::Int(3) => Value::Int(5),
}),
parse(r#"array(1=>3,4,5)"#).unwrap()
);
assert_eq!(
Value::Array(hashmap! {
Key::Int(1) => Value::Int(3),
Key::Int(2) => Value::Int(4),
Key::Int(3) => Value::Int(5),
}),
parse(r#"array("1"=>3,4,5)"#).unwrap()
);
assert_eq!(
Value::Array(hashmap! {
Key::Int(1) => Value::Int(3),
Key::Int(2) => Value::Int(4),
Key::Int(3) => Value::Int(5),
}),
parse(r#"array(1.5=>3,4,5)"#).unwrap()
);
assert_eq!(
Value::Array(hashmap! {
Key::Int(1) => Value::Int(3),
Key::Int(2) => Value::Int(4),
Key::Int(3) => Value::Int(5),
}),
parse(r#"array(true=>3,4,5)"#).unwrap()
);
assert_eq!(
Value::Array(hashmap! {
Key::Int(1) => Value::Int(3),
Key::String("foo".into()) => Value::Int(4),
Key::Int(2) => Value::Int(5),
}),
parse(r#"array(1=>3,"foo" => 4,5)"#).unwrap()
);
assert_eq!(
Value::Array(hashmap! {
Key::String("foo".into()) => Value::Bool(true),
Key::String("nested".into()) => Value::Array(hashmap! {
Key::String("foo".into()) => Value::Bool(false),
}),
}),
parse(r#"array("foo" => true, "nested" => array ('foo' => false))"#).unwrap()
);
assert_eq!(
Value::Array(hashmap! {
Key::String("foo".into()) => Value::Bool(true),
Key::String("nested".into()) => Value::Array(hashmap! {
Key::String("foo".into()) => Value::Null,
}),
}),
parse(r#"["foo" => true, "nested" => ['foo' => null]]"#).unwrap()
);
assert_eq!(Value::Int(-432), parse(r#"-432"#).unwrap());
assert_eq!(Value::Int(282), parse(r#"0432"#).unwrap());
assert_eq!(Value::Int(26), parse(r#"0x1A"#).unwrap());
assert_eq!(Value::Int(3), parse(r#"0b11"#).unwrap());
assert_eq!(Value::Int(12345), parse(r#"12_34_5"#).unwrap());
assert_eq!(Value::Bool(true), parse(r#"True"#).unwrap());
assert_eq!(Value::Float(-432.0), parse(r#"-432.0"#).unwrap());
assert_eq!(Value::Float(0.12), parse(r#".12"#).unwrap());
assert_eq!(Value::Float(1000.0), parse(r#"10e2"#).unwrap());
assert_eq!(Value::Float(1.0), parse(r#"10e-1"#).unwrap());
assert_eq!(Value::Float(1234.5), parse(r#"12_34.5"#).unwrap());
assert_eq!(
Value::Array(hashmap! {
Key::Int(2) => Value::Int(3),
Key::String("foo".into()) => Value::Int(4),
Key::String("".into()) => Value::Int(5),
Key::Int(1) => Value::Int(6),
Key::Int(0) => Value::Int(7),
}),
parse(r#"array("2"=>3,"foo" => 4, null => 5, true => 6, false => 7)"#).unwrap()
);
}