serde stuff

This commit is contained in:
Robin Appelman 2020-12-13 22:31:06 +01:00
commit 80a72c3d72
7 changed files with 466 additions and 285 deletions

View file

@ -13,6 +13,8 @@ logos = "0.11"
thiserror = "1.0" thiserror = "1.0"
source-span = "2.2" source-span = "2.2"
memchr = "2.3.4" memchr = "2.3.4"
serde = "1.0"
[dev-dependencies] [dev-dependencies]
maplit = "1.0.2" maplit = "1.0.2"
serde_derive = "1.0"

View file

@ -70,6 +70,12 @@ pub struct SourceSpannedError<'source, T> {
source: &'source str, source: &'source str,
} }
impl<'source, T: Error + Debug + 'static> SourceSpannedError<'source, T> {
pub fn spanned(self) -> SpannedError<T> {
SpannedError::new(self.error, self.span)
}
}
const METRICS: DefaultMetrics = DefaultMetrics::with_tab_stop(4); const METRICS: DefaultMetrics = DefaultMetrics::with_tab_stop(4);
impl<'source, T: Error + Debug> Display for SourceSpannedError<'source, T> { impl<'source, T: Error + Debug> Display for SourceSpannedError<'source, T> {
@ -122,6 +128,10 @@ pub enum ParseError {
InvalidFloatLiteral(#[from] ParseFloatError), InvalidFloatLiteral(#[from] ParseFloatError),
#[error("Invalid string literal")] #[error("Invalid string literal")]
InvalidStringLiteral, InvalidStringLiteral,
#[error("Array key not valid for this position")]
UnexpectedArrayKey,
#[error("Trailing characters after parsing")]
TrailingCharacters,
} }
impl From<UnescapeError> for ParseError { impl From<UnescapeError> for ParseError {
@ -190,6 +200,20 @@ impl<'source> ExpectToken<'source> for Option<SpannedToken<'source>> {
} }
} }
impl<'a, 'source> ExpectToken<'source> for Option<&'a SpannedToken<'source>> {
fn expect_token(
self,
expected: &[Token],
) -> Result<SpannedToken<'source>, SpannedError<ParseError>> {
self.ok_or_else(|| UnexpectedTokenError {
expected: expected.to_vec(),
found: None,
})
.with_span(usize::max_value()..usize::max_value())
.and_then(|token| token.clone().expect_token(expected))
}
}
impl<'source> ExpectToken<'source> for SpannedToken<'source> { impl<'source> ExpectToken<'source> for SpannedToken<'source> {
fn expect_token( fn expect_token(
self, self,

View file

@ -166,6 +166,7 @@ fn test_lex_float() {
assert_eq!(lex.next(), None); assert_eq!(lex.next(), None);
} }
#[derive(Clone, Debug)]
pub struct SpannedToken<'source> { pub struct SpannedToken<'source> {
pub token: Token, pub token: Token,
pub span: Span, pub span: Span,

View file

@ -23,6 +23,7 @@ mod error;
mod lexer; mod lexer;
mod num; mod num;
mod parser; mod parser;
mod serde;
mod string; mod string;
pub use error::{ParseError, SpannedError}; pub use error::{ParseError, SpannedError};

View file

@ -8,6 +8,8 @@ pub enum ParseIntError {
InvalidDigit, InvalidDigit,
#[error("number too large or small to fit in target type")] #[error("number too large or small to fit in target type")]
Overflow, Overflow,
#[error("unexpected negative number")]
UnexpectedNegative,
} }
/// Mostly copied from std /// Mostly copied from std

View file

@ -5,6 +5,7 @@ use crate::string::{is_array_key_numeric, parse_string};
use crate::{Key, Value}; use crate::{Key, Value};
use logos::Logos; use logos::Logos;
use std::collections::HashMap; use std::collections::HashMap;
use std::iter::Peekable;
use std::num::ParseFloatError; use std::num::ParseFloatError;
/// Parse a php literal /// Parse a php literal
@ -30,16 +31,24 @@ pub fn parse(source: &str) -> Result<Value, SpannedError<ParseError>> {
} }
pub struct Parser<'source> { pub struct Parser<'source> {
tokens: TokenStream<'source>, tokens: Peekable<TokenStream<'source>>,
} }
impl<'source> Parser<'source> { impl<'source> Parser<'source> {
pub fn new(source: &'source str) -> Self { pub fn new(source: &'source str) -> Self {
Parser { Parser {
tokens: TokenStream::new(Token::lexer(source)), tokens: TokenStream::new(Token::lexer(source)).peekable(),
} }
} }
pub fn next_token(&mut self) -> Option<SpannedToken<'source>> {
self.tokens.next()
}
pub fn eat_token(&mut self) {
let _ = self.tokens.next();
}
pub fn run(&mut self) -> Result<Value, SpannedError<ParseError>> { pub fn run(&mut self) -> Result<Value, SpannedError<ParseError>> {
let token = self.tokens.next().expect_token(&[ let token = self.tokens.next().expect_token(&[
Token::Bool, Token::Bool,
@ -55,10 +64,10 @@ impl<'source> Parser<'source> {
pub fn parse_any(&mut self, token: SpannedToken) -> Result<Value, SpannedError<ParseError>> { pub fn parse_any(&mut self, token: SpannedToken) -> Result<Value, SpannedError<ParseError>> {
let value = match token.token { let value = match token.token {
Token::Bool => Value::Bool(self.parse_bool(token)?), Token::Bool => Value::Bool(self.parse_bool_token(token)?),
Token::Integer => Value::Int(self.parse_int(token)?), Token::Integer => Value::Int(self.parse_int_token(token)?),
Token::Float => Value::Float(self.parse_float(token)?), Token::Float => Value::Float(self.parse_float_token(token)?),
Token::LiteralString => Value::String(self.parse_string(token)?), Token::LiteralString => Value::String(self.parse_string_token(token)?),
Token::Null => Value::Null, Token::Null => Value::Null,
Token::Array => Value::Array(self.parse_array(ArraySyntax::Long)?), Token::Array => Value::Array(self.parse_array(ArraySyntax::Long)?),
Token::SquareOpen => Value::Array(self.parse_array(ArraySyntax::Short)?), Token::SquareOpen => Value::Array(self.parse_array(ArraySyntax::Short)?),
@ -68,7 +77,7 @@ impl<'source> Parser<'source> {
Ok(value) Ok(value)
} }
pub fn parse_bool(&self, token: SpannedToken) -> Result<bool, SpannedError<ParseError>> { pub fn parse_bool_token(&self, token: SpannedToken) -> Result<bool, SpannedError<ParseError>> {
token token
.slice() .slice()
.to_ascii_lowercase() .to_ascii_lowercase()
@ -76,15 +85,18 @@ impl<'source> Parser<'source> {
.with_span(token.span) .with_span(token.span)
} }
pub fn parse_int(&self, token: SpannedToken) -> Result<i64, SpannedError<ParseError>> { pub fn parse_int_token(&self, token: SpannedToken) -> Result<i64, SpannedError<ParseError>> {
parse_int(token.slice()).with_span(token.span) parse_int(token.slice()).with_span(token.span)
} }
pub fn parse_float(&self, token: SpannedToken) -> Result<f64, SpannedError<ParseError>> { pub fn parse_float_token(&self, token: SpannedToken) -> Result<f64, SpannedError<ParseError>> {
parse_float(token.slice()).with_span(token.span) parse_float(token.slice()).with_span(token.span)
} }
pub fn parse_string(&self, token: SpannedToken) -> Result<String, SpannedError<ParseError>> { pub fn parse_string_token(
&self,
token: SpannedToken,
) -> Result<String, SpannedError<ParseError>> {
parse_string(token.slice()).with_span(token.span) parse_string(token.slice()).with_span(token.span)
} }
@ -192,8 +204,8 @@ fn parse_float(literal: &str) -> Result<f64, ParseFloatError> {
#[derive(Default)] #[derive(Default)]
struct ArrayBuilder { struct ArrayBuilder {
next_int_key: i64,
data: HashMap<Key, Value>, data: HashMap<Key, Value>,
next_int_key: i64,
} }
impl ArrayBuilder { impl ArrayBuilder {
@ -211,14 +223,14 @@ impl ArrayBuilder {
} }
} }
#[derive(Eq, PartialEq)] #[derive(Eq, PartialEq, Copy, Clone)]
pub enum ArraySyntax { pub enum ArraySyntax {
Short, Short,
Long, Long,
} }
impl ArraySyntax { impl ArraySyntax {
fn close_bracket(&self) -> Token { pub fn close_bracket(&self) -> Token {
match self { match self {
ArraySyntax::Long => Token::BracketClose, ArraySyntax::Long => Token::BracketClose,
ArraySyntax::Short => Token::SquareClose, ArraySyntax::Short => Token::SquareClose,

View file

@ -1,32 +1,78 @@
use std::ops::{AddAssign, MulAssign, Neg};
use serde::de::{ use serde::de::{
self, DeserializeSeed, EnumAccess, IntoDeserializer, MapAccess, SeqAccess, VariantAccess, self, DeserializeSeed, EnumAccess, IntoDeserializer, MapAccess, SeqAccess, VariantAccess,
Visitor, Visitor,
}; };
use serde::Deserialize; use serde::Deserialize;
use crate::lexer::Token; use crate::error::{ExpectToken, ResultExt};
use crate::parser::{SpannedToken, TokenStream}; use crate::lexer::{SpannedToken, Token};
use crate::ParseError; use crate::num::ParseIntError;
use error::{Error, Result}; use crate::parser::{ArraySyntax, Parser};
use logos::Lexer; use crate::{Key, ParseError, SpannedError};
use std::iter::Peekable; use serde::export::TryFrom;
use std::error::Error;
use std::fmt::{Display, Formatter};
type Result<T> = std::error::Result<T, ParseError>; #[derive(Debug)]
pub enum SerdeParseError {
Parse(ParseError),
Spanned(SpannedError<ParseError>),
Custom(String),
}
impl From<SpannedError<ParseError>> for SerdeParseError {
fn from(err: SpannedError<ParseError>) -> Self {
SerdeParseError::Spanned(err)
}
}
impl From<ParseError> for SerdeParseError {
fn from(err: ParseError) -> Self {
SerdeParseError::Parse(err)
}
}
impl Display for SerdeParseError {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
match self {
SerdeParseError::Spanned(err) => write!(f, "{}", err),
SerdeParseError::Parse(err) => write!(f, "{}", err),
SerdeParseError::Custom(err) => write!(f, "{}", err),
}
}
}
impl Error for SerdeParseError {
fn source(&self) -> Option<&(dyn Error + 'static)> {
match self {
SerdeParseError::Spanned(err) => Some(err),
SerdeParseError::Parse(err) => Some(err),
SerdeParseError::Custom(_) => None,
}
}
}
impl serde::de::Error for SerdeParseError {
fn custom<T>(msg: T) -> Self
where
T: Display,
{
SerdeParseError::Custom(msg.to_string())
}
}
type Result<T> = std::result::Result<T, SerdeParseError>;
pub struct Deserializer<'de> { pub struct Deserializer<'de> {
input: &'de str, parser: Parser<'de>,
token_stream: Peekable<TokenStream<'de>>, peeked: Option<SpannedToken<'de>>,
} }
impl<'de> Deserializer<'de> { impl<'de> Deserializer<'de> {
pub fn from_str(input: &'de str) -> Self { pub fn from_str(input: &'de str) -> Self {
let lexer = Token::lexer(source);
let token_stream = TokenStream::new(lexer, input);
Deserializer { Deserializer {
input, parser: Parser::new(input),
token_stream: token_stream.peekable(), peeked: None,
} }
} }
} }
@ -37,104 +83,108 @@ where
{ {
let mut deserializer = Deserializer::from_str(s); let mut deserializer = Deserializer::from_str(s);
let t = T::deserialize(&mut deserializer)?; let t = T::deserialize(&mut deserializer)?;
if deserializer.input.is_empty() { if deserializer.next_token().is_none() {
Ok(t) Ok(t)
} else { } else {
Err(Error::TrailingCharacters) Err(ParseError::TrailingCharacters.into())
} }
} }
impl<'de> Deserializer<'de> { impl<'de> Deserializer<'de> {
fn parse_bool(&mut self) -> Result<bool> { fn next_token(&mut self) -> Option<SpannedToken<'de>> {
let token = self.token_stream.next() self.peeked.take().or_else(|| self.parser.next_token())
if self.input.starts_with("true") { }
self.input = &self.input["true".len()..];
Ok(true) fn peek_token(&mut self) -> Option<&SpannedToken<'de>> {
} else if self.input.starts_with("false") { if self.peeked.is_none() {
self.input = &self.input["false".len()..]; self.peeked = self.next_token()
Ok(false) }
} else { self.peeked.as_ref()
Err(Error::ExpectedBoolean) }
fn eat_token(&mut self) {
if self.peeked.take().is_none() {
self.parser.eat_token()
} }
} }
// Parse a group of decimal digits as an unsigned integer of type T. fn parse_bool(&mut self) -> Result<bool> {
// let token = self.next_token().expect_token(&[Token::Bool])?;
// This implementation is a bit too lenient, for example `001` is not Ok(self.parser.parse_bool_token(token)?)
// allowed in JSON. Also the various arithmetic operations can overflow and }
// panic or return bogus data. But it is good enough for example code!
fn set_peeked(&mut self, peeked: SpannedToken<'de>) {
self.peeked = Some(peeked)
}
fn parse_unsigned<T>(&mut self) -> Result<T> fn parse_unsigned<T>(&mut self) -> Result<T>
where where
T: AddAssign<T> + MulAssign<T> + From<u8>, T: TryFrom<i64>,
{ {
let mut int = match self.next_char()? { let token = self.next_token().expect_token(&[Token::Integer])?;
ch @ '0'..='9' => T::from(ch as u8 - b'0'), let span = token.span.clone();
_ => { let int = self.parser.parse_int_token(token)?;
return Err(Error::ExpectedInteger); if int < 0 {
} Err(SpannedError::new(
}; ParseError::InvalidIntLiteral(ParseIntError::UnexpectedNegative),
loop { span,
match self.input.chars().next() { )
Some(ch @ '0'..='9') => { .into())
self.input = &self.input[1..]; } else {
int *= T::from(10); Ok(T::try_from(int).map_err(|_| {
int += T::from(ch as u8 - b'0'); SpannedError::new(ParseError::InvalidIntLiteral(ParseIntError::Overflow), span)
} })?)
_ => {
return Ok(int);
}
}
} }
} }
// Parse a possible minus sign followed by a group of decimal digits as a
// signed integer of type T.
fn parse_signed<T>(&mut self) -> Result<T> fn parse_signed<T>(&mut self) -> Result<T>
where where
T: Neg<Output = T> + AddAssign<T> + MulAssign<T> + From<i8>, T: TryFrom<i64>,
{ {
// Optional minus sign, delegate to `parse_unsigned`, negate if negative. let token = self.next_token().expect_token(&[Token::Integer])?;
unimplemented!() let span = token.span.clone();
Ok(
T::try_from(self.parser.parse_int_token(token)?).map_err(|_| {
SpannedError::new(ParseError::InvalidIntLiteral(ParseIntError::Overflow), span)
})?,
)
} }
// Parse a string until the next '"' character. fn parse_float(&mut self) -> Result<f64> {
// let token = self.next_token().expect_token(&[Token::Float])?;
// Makes no attempt to handle escape sequences. What did you expect? This is Ok(self.parser.parse_float_token(token)?)
// example code! }
fn parse_string(&mut self) -> Result<&'de str> {
if self.next_char()? != '"' { fn parse_string(&mut self) -> Result<String> {
return Err(Error::ExpectedString); let token = self.next_token().expect_token(&[Token::LiteralString])?;
} Ok(self.parser.parse_string_token(token)?)
match self.input.find('"') {
Some(len) => {
let s = &self.input[..len];
self.input = &self.input[len + 1..];
Ok(s)
}
None => Err(Error::Eof),
}
} }
} }
impl<'de, 'a> de::Deserializer<'de> for &'a mut Deserializer<'de> { impl<'de, 'a> de::Deserializer<'de> for &'a mut Deserializer<'de> {
type Error = ParseError; type Error = SerdeParseError;
// Look at the input data to decide what Serde data model type to
// deserialize as. Not all data formats are able to support this operation.
// Formats that support `deserialize_any` are known as self-describing.
fn deserialize_any<V>(self, visitor: V) -> Result<V::Value> fn deserialize_any<V>(self, visitor: V) -> Result<V::Value>
where where
V: Visitor<'de>, V: Visitor<'de>,
{ {
match self.peek_char()? { let peek = self.peek_token().expect_token(&[
'n' => self.deserialize_unit(visitor), Token::Null,
't' | 'f' => self.deserialize_bool(visitor), Token::Bool,
'"' => self.deserialize_str(visitor), Token::LiteralString,
'0'..='9' => self.deserialize_u64(visitor), Token::Integer,
'-' => self.deserialize_i64(visitor), Token::Float,
'[' => self.deserialize_seq(visitor), Token::Array,
'{' => self.deserialize_map(visitor), Token::SquareOpen,
_ => Err(Error::Syntax), ])?;
match peek.token {
Token::Null => self.deserialize_unit(visitor),
Token::Bool => self.deserialize_bool(visitor),
Token::LiteralString => self.deserialize_string(visitor),
Token::Integer => self.deserialize_i64(visitor),
Token::Float => self.deserialize_f64(visitor),
Token::Array | Token::SquareOpen => self.deserialize_seq(visitor),
_ => unreachable!(),
} }
} }
@ -203,20 +253,18 @@ impl<'de, 'a> de::Deserializer<'de> for &'a mut Deserializer<'de> {
visitor.visit_u64(self.parse_unsigned()?) visitor.visit_u64(self.parse_unsigned()?)
} }
// Float parsing is stupidly hard. fn deserialize_f32<V>(self, visitor: V) -> Result<V::Value>
fn deserialize_f32<V>(self, _visitor: V) -> Result<V::Value>
where where
V: Visitor<'de>, V: Visitor<'de>,
{ {
unimplemented!() visitor.visit_f32(self.parse_float()? as f32)
} }
// Float parsing is stupidly hard. fn deserialize_f64<V>(self, visitor: V) -> Result<V::Value>
fn deserialize_f64<V>(self, _visitor: V) -> Result<V::Value>
where where
V: Visitor<'de>, V: Visitor<'de>,
{ {
unimplemented!() visitor.visit_f64(self.parse_float()?)
} }
// The `Serializer` implementation on the previous page serialized chars as // The `Serializer` implementation on the previous page serialized chars as
@ -235,14 +283,15 @@ impl<'de, 'a> de::Deserializer<'de> for &'a mut Deserializer<'de> {
where where
V: Visitor<'de>, V: Visitor<'de>,
{ {
visitor.visit_borrowed_str(self.parse_string()?) let str = self.parse_string()?;
visitor.visit_str(str.as_str())
} }
fn deserialize_string<V>(self, visitor: V) -> Result<V::Value> fn deserialize_string<V>(self, visitor: V) -> Result<V::Value>
where where
V: Visitor<'de>, V: Visitor<'de>,
{ {
self.deserialize_str(visitor) visitor.visit_string(self.parse_string()?)
} }
// The `Serializer` implementation on the previous page serialized byte // The `Serializer` implementation on the previous page serialized byte
@ -251,14 +300,15 @@ impl<'de, 'a> de::Deserializer<'de> for &'a mut Deserializer<'de> {
where where
V: Visitor<'de>, V: Visitor<'de>,
{ {
unimplemented!() todo!()
} }
fn deserialize_byte_buf<V>(self, _visitor: V) -> Result<V::Value> fn deserialize_byte_buf<V>(self, _visitor: V) -> Result<V::Value>
where where
V: Visitor<'de>, V: Visitor<'de>,
{ {
unimplemented!() todo!()
// visitor.visit_string(self.parse_string()?.to_vec())
} }
// An absent optional is represented as the JSON `null` and a present // An absent optional is represented as the JSON `null` and a present
@ -273,28 +323,31 @@ impl<'de, 'a> de::Deserializer<'de> for &'a mut Deserializer<'de> {
where where
V: Visitor<'de>, V: Visitor<'de>,
{ {
if self.input.starts_with("null") { let token = self.peek_token().expect_token(&[
self.input = &self.input["null".len()..]; Token::Null,
Token::Bool,
Token::LiteralString,
Token::Integer,
Token::Float,
Token::Array,
Token::SquareOpen,
])?;
if token.token == Token::Null {
let _ = self.next_token();
visitor.visit_none() visitor.visit_none()
} else { } else {
visitor.visit_some(self) visitor.visit_some(self)
} }
} }
// In Serde, unit means an anonymous value containing no data.
fn deserialize_unit<V>(self, visitor: V) -> Result<V::Value> fn deserialize_unit<V>(self, visitor: V) -> Result<V::Value>
where where
V: Visitor<'de>, V: Visitor<'de>,
{ {
if self.input.starts_with("null") { self.next_token().expect_token(&[Token::Null])?;
self.input = &self.input["null".len()..]; visitor.visit_unit()
visitor.visit_unit()
} else {
Err(Error::ExpectedNull)
}
} }
// Unit struct means a named value containing no data.
fn deserialize_unit_struct<V>(self, _name: &'static str, visitor: V) -> Result<V::Value> fn deserialize_unit_struct<V>(self, _name: &'static str, visitor: V) -> Result<V::Value>
where where
V: Visitor<'de>, V: Visitor<'de>,
@ -302,9 +355,6 @@ impl<'de, 'a> de::Deserializer<'de> for &'a mut Deserializer<'de> {
self.deserialize_unit(visitor) self.deserialize_unit(visitor)
} }
// As is done here, serializers are encouraged to treat newtype structs as
// insignificant wrappers around the data they contain. That means not
// parsing anything other than the contained value.
fn deserialize_newtype_struct<V>(self, _name: &'static str, visitor: V) -> Result<V::Value> fn deserialize_newtype_struct<V>(self, _name: &'static str, visitor: V) -> Result<V::Value>
where where
V: Visitor<'de>, V: Visitor<'de>,
@ -312,34 +362,23 @@ impl<'de, 'a> de::Deserializer<'de> for &'a mut Deserializer<'de> {
visitor.visit_newtype_struct(self) visitor.visit_newtype_struct(self)
} }
// Deserialization of compound types like sequences and maps happens by
// passing the visitor an "Access" object that gives it the ability to
// iterate through the data contained in the sequence.
fn deserialize_seq<V>(mut self, visitor: V) -> Result<V::Value> fn deserialize_seq<V>(mut self, visitor: V) -> Result<V::Value>
where where
V: Visitor<'de>, V: Visitor<'de>,
{ {
// Parse the opening bracket of the sequence. let token = self
if self.next_char()? == '[' { .next_token()
// Give the visitor access to each element of the sequence. .expect_token(&[Token::Array, Token::SquareOpen])?;
let value = visitor.visit_seq(CommaSeparated::new(&mut self))?; let syntax = match token.token {
// Parse the closing bracket of the sequence. Token::Array => ArraySyntax::Long,
if self.next_char()? == ']' { Token::SquareOpen => ArraySyntax::Short,
Ok(value) _ => unreachable!(),
} else { };
Err(Error::ExpectedArrayEnd)
} let value = visitor.visit_seq(ArrayWalker::new(&mut self, syntax))?;
} else { Ok(value)
Err(Error::ExpectedArray)
}
} }
// Tuples look just like sequences in JSON. Some formats may be able to
// represent tuples more efficiently.
//
// As indicated by the length parameter, the `Deserialize` implementation
// for a tuple in the Serde data model is required to know the length of the
// tuple before even looking at the input data.
fn deserialize_tuple<V>(self, _len: usize, visitor: V) -> Result<V::Value> fn deserialize_tuple<V>(self, _len: usize, visitor: V) -> Result<V::Value>
where where
V: Visitor<'de>, V: Visitor<'de>,
@ -347,7 +386,6 @@ impl<'de, 'a> de::Deserializer<'de> for &'a mut Deserializer<'de> {
self.deserialize_seq(visitor) self.deserialize_seq(visitor)
} }
// Tuple structs look just like sequences in JSON.
fn deserialize_tuple_struct<V>( fn deserialize_tuple_struct<V>(
self, self,
_name: &'static str, _name: &'static str,
@ -360,34 +398,23 @@ impl<'de, 'a> de::Deserializer<'de> for &'a mut Deserializer<'de> {
self.deserialize_seq(visitor) self.deserialize_seq(visitor)
} }
// Much like `deserialize_seq` but calls the visitors `visit_map` method
// with a `MapAccess` implementation, rather than the visitor's `visit_seq`
// method with a `SeqAccess` implementation.
fn deserialize_map<V>(mut self, visitor: V) -> Result<V::Value> fn deserialize_map<V>(mut self, visitor: V) -> Result<V::Value>
where where
V: Visitor<'de>, V: Visitor<'de>,
{ {
// Parse the opening brace of the map. let token = self
if self.next_char()? == '{' { .next_token()
// Give the visitor access to each entry of the map. .expect_token(&[Token::Array, Token::SquareOpen])?;
let value = visitor.visit_map(CommaSeparated::new(&mut self))?; let syntax = match token.token {
// Parse the closing brace of the map. Token::Array => ArraySyntax::Long,
if self.next_char()? == '}' { Token::SquareOpen => ArraySyntax::Short,
Ok(value) _ => unreachable!(),
} else { };
Err(Error::ExpectedMapEnd)
} let value = visitor.visit_map(ArrayWalker::new(&mut self, syntax))?;
} else { Ok(value)
Err(Error::ExpectedMap)
}
} }
// Structs look just like maps in JSON.
//
// Notice the `fields` parameter - a "struct" in the Serde data model means
// that the `Deserialize` implementation is required to know what the fields
// are before even looking at the input data. Any key-value pairing in which
// the fields cannot be known ahead of time is probably a map.
fn deserialize_struct<V>( fn deserialize_struct<V>(
self, self,
_name: &'static str, _name: &'static str,
@ -409,27 +436,31 @@ impl<'de, 'a> de::Deserializer<'de> for &'a mut Deserializer<'de> {
where where
V: Visitor<'de>, V: Visitor<'de>,
{ {
if self.peek_char()? == '"' { // panic!("a");
// Visit a unit variant. let token = self.peek_token().expect_token(&[
visitor.visit_enum(self.parse_string()?.into_deserializer()) Token::LiteralString,
} else if self.next_char()? == '{' { Token::Array,
// Visit a newtype variant, tuple variant, or struct variant. Token::SquareOpen,
let value = visitor.visit_enum(Enum::new(self))?; ])?;
// Parse the matching close brace. match token.token {
if self.next_char()? == '}' { Token::LiteralString => visitor.visit_enum(self.parse_string()?.into_deserializer()),
Token::Array | Token::SquareOpen => {
self.eat_token();
let syntax = if token.token == Token::Array {
self.next_token().expect_token(&[Token::BracketOpen])?;
ArraySyntax::Long
} else {
ArraySyntax::Short
};
let value = visitor.visit_enum(Enum::new(self))?;
self.next_token().expect_token(&[syntax.close_bracket()])?;
Ok(value) Ok(value)
} else {
Err(Error::ExpectedMapEnd)
} }
} else { _ => unreachable!(),
Err(Error::ExpectedEnum)
} }
} }
// An identifier in Serde is the type that identifies a field of a struct or
// the variant of an enum. In JSON, struct fields and enum variants are
// represented as strings. In other formats they may be represented as
// numeric indices.
fn deserialize_identifier<V>(self, visitor: V) -> Result<V::Value> fn deserialize_identifier<V>(self, visitor: V) -> Result<V::Value>
where where
V: Visitor<'de>, V: Visitor<'de>,
@ -437,17 +468,6 @@ impl<'de, 'a> de::Deserializer<'de> for &'a mut Deserializer<'de> {
self.deserialize_str(visitor) self.deserialize_str(visitor)
} }
// Like `deserialize_any` but indicates to the `Deserializer` that it makes
// no difference which `Visitor` method is called because the data is
// ignored.
//
// Some deserializers are able to implement this more efficiently than
// `deserialize_any`, for example by rapidly skipping over matched
// delimiters without paying close attention to the data in between.
//
// Some formats are not able to implement this at all. Formats that can
// implement `deserialize_any` and `deserialize_ignored_any` are known as
// self-describing.
fn deserialize_ignored_any<V>(self, visitor: V) -> Result<V::Value> fn deserialize_ignored_any<V>(self, visitor: V) -> Result<V::Value>
where where
V: Visitor<'de>, V: Visitor<'de>,
@ -456,62 +476,116 @@ impl<'de, 'a> de::Deserializer<'de> for &'a mut Deserializer<'de> {
} }
} }
// In order to handle commas correctly when deserializing a JSON array or map, struct ArrayWalker<'source, 'a> {
// we need to track whether we are on the first element or past the first de: &'a mut Deserializer<'source>,
// element. next_int_key: i64,
struct CommaSeparated<'a, 'de: 'a> { syntax: ArraySyntax,
de: &'a mut Deserializer<'de>, done: bool,
first: bool,
} }
impl<'a, 'de> CommaSeparated<'a, 'de> { impl<'source, 'a> ArrayWalker<'source, 'a> {
fn new(de: &'a mut Deserializer<'de>) -> Self { pub fn new(de: &'a mut Deserializer<'source>, syntax: ArraySyntax) -> Self {
CommaSeparated { de, first: true } ArrayWalker {
de,
next_int_key: 0,
syntax,
done: false,
}
} }
} }
// `SeqAccess` is provided to the `Visitor` to give it the ability to iterate impl<'de, 'a> SeqAccess<'de> for ArrayWalker<'de, 'a> {
// through elements of the sequence. type Error = SerdeParseError;
impl<'de, 'a> SeqAccess<'de> for CommaSeparated<'a, 'de> {
type Error = Error;
fn next_element_seed<T>(&mut self, seed: T) -> Result<Option<T::Value>> fn next_element_seed<T>(&mut self, seed: T) -> Result<Option<T::Value>>
where where
T: DeserializeSeed<'de>, T: DeserializeSeed<'de>,
{ {
// Check if there are no more elements. if self.done {
if self.de.peek_char()? == ']' {
return Ok(None); return Ok(None);
} }
// Comma is required before every element except the first.
if !self.first && self.de.next_char()? != ',' { let token = self.de.next_token().expect_token(&[
return Err(Error::ExpectedArrayComma); Token::Bool,
} Token::Integer,
self.first = false; Token::Float,
Token::LiteralString,
Token::Null,
Token::Array,
Token::SquareOpen,
])?;
let next = self.de.next_token().expect_token(&[
self.syntax.close_bracket(),
Token::Comma,
Token::Arrow,
])?;
let value_token = match next.token.clone() {
Token::Comma => token,
Token::Arrow => {
let span = token.span.clone();
let key = self.de.parser.parse_array_key(token)?;
match key {
Key::Int(key) if key == self.next_int_key => Ok(()),
_ => Err(ParseError::UnexpectedArrayKey).with_span(span),
}?;
self.next_int_key += 1;
let value = self.de.next_token().expect_token(&[
Token::Bool,
Token::Integer,
Token::Float,
Token::LiteralString,
Token::Null,
Token::Array,
Token::SquareOpen,
])?;
let next = self
.de
.next_token()
.expect_token(&[Token::Comma, self.syntax.close_bracket()])?;
if next.token == self.syntax.close_bracket() {
self.done = true;
}
value
}
peeked_token if peeked_token == self.syntax.close_bracket() => {
self.done = true;
token
}
_ => unreachable!(),
};
dbg!(value_token.slice());
// Deserialize an array element. // Deserialize an array element.
self.de.set_peeked(value_token);
seed.deserialize(&mut *self.de).map(Some) seed.deserialize(&mut *self.de).map(Some)
} }
} }
// `MapAccess` is provided to the `Visitor` to give it the ability to iterate impl<'de, 'a> MapAccess<'de> for ArrayWalker<'de, 'a> {
// through entries of the map. type Error = SerdeParseError;
impl<'de, 'a> MapAccess<'de> for CommaSeparated<'a, 'de> {
type Error = Error;
fn next_key_seed<K>(&mut self, seed: K) -> Result<Option<K::Value>> fn next_key_seed<K>(&mut self, seed: K) -> Result<Option<K::Value>>
where where
K: DeserializeSeed<'de>, K: DeserializeSeed<'de>,
{ {
// Check if there are no more entries. if self.done {
if self.de.peek_char()? == '}' {
return Ok(None); return Ok(None);
} }
// Comma is required before every entry except the first.
if !self.first && self.de.next_char()? != ',' { let token = self.de.next_token().expect_token(&[
return Err(Error::ExpectedMapComma); Token::Bool,
} Token::Integer,
self.first = false; Token::Float,
Token::LiteralString,
Token::Null,
])?;
self.de.next_token().expect_token(&[Token::Arrow])?;
// Deserialize a map key. // Deserialize a map key.
self.de.set_peeked(token);
seed.deserialize(&mut *self.de).map(Some) seed.deserialize(&mut *self.de).map(Some)
} }
@ -519,14 +593,29 @@ impl<'de, 'a> MapAccess<'de> for CommaSeparated<'a, 'de> {
where where
V: DeserializeSeed<'de>, V: DeserializeSeed<'de>,
{ {
// It doesn't make a difference whether the colon is parsed at the end let token = self.de.next_token().expect_token(&[
// of `next_key_seed` or at the beginning of `next_value_seed`. In this Token::Bool,
// case the code is a bit simpler having it here. Token::Integer,
if self.de.next_char()? != ':' { Token::Float,
return Err(Error::ExpectedMapColon); Token::LiteralString,
Token::Null,
Token::Array,
Token::SquareOpen,
])?;
// Deserialize a map key.
self.de.set_peeked(token);
let value = seed.deserialize(&mut *self.de)?;
let next = self
.de
.next_token()
.expect_token(&[Token::Comma, self.syntax.close_bracket()])?;
if next.token == self.syntax.close_bracket() {
self.done = true;
} }
// Deserialize a map value. Ok(value)
seed.deserialize(&mut *self.de)
} }
} }
@ -546,39 +635,29 @@ impl<'a, 'de> Enum<'a, 'de> {
// Note that all enum deserialization methods in Serde refer exclusively to the // Note that all enum deserialization methods in Serde refer exclusively to the
// "externally tagged" enum representation. // "externally tagged" enum representation.
impl<'de, 'a> EnumAccess<'de> for Enum<'a, 'de> { impl<'de, 'a> EnumAccess<'de> for Enum<'a, 'de> {
type Error = Error; type Error = SerdeParseError;
type Variant = Self; type Variant = Self;
fn variant_seed<V>(self, seed: V) -> Result<(V::Value, Self::Variant)> fn variant_seed<V>(self, seed: V) -> Result<(V::Value, Self::Variant)>
where where
V: DeserializeSeed<'de>, V: DeserializeSeed<'de>,
{ {
// The `deserialize_enum` method parsed a `{` character so we are
// currently inside of a map. The seed will be deserializing itself from
// the key of the map.
let val = seed.deserialize(&mut *self.de)?; let val = seed.deserialize(&mut *self.de)?;
// Parse the colon separating map key from value. self.de.next_token().expect_token(&[Token::Arrow])?;
if self.de.next_char()? == ':' { Ok((val, self))
Ok((val, self))
} else {
Err(Error::ExpectedMapColon)
}
} }
} }
// `VariantAccess` is provided to the `Visitor` to give it the ability to see // `VariantAccess` is provided to the `Visitor` to give it the ability to see
// the content of the single variant that it decided to deserialize. // the content of the single variant that it decided to deserialize.
impl<'de, 'a> VariantAccess<'de> for Enum<'a, 'de> { impl<'de, 'a> VariantAccess<'de> for Enum<'a, 'de> {
type Error = Error; type Error = SerdeParseError;
// If the `Visitor` expected this variant to be a unit variant, the input
// should have been the plain string case handled in `deserialize_enum`.
fn unit_variant(self) -> Result<()> { fn unit_variant(self) -> Result<()> {
Err(Error::ExpectedString) self.de.next_token().expect_token(&[Token::LiteralString])?;
Ok(())
} }
// Newtype variants are represented in JSON as `{ NAME: VALUE }` so
// deserialize the value here.
fn newtype_variant_seed<T>(self, seed: T) -> Result<T::Value> fn newtype_variant_seed<T>(self, seed: T) -> Result<T::Value>
where where
T: DeserializeSeed<'de>, T: DeserializeSeed<'de>,
@ -586,8 +665,6 @@ impl<'de, 'a> VariantAccess<'de> for Enum<'a, 'de> {
seed.deserialize(self.de) seed.deserialize(self.de)
} }
// Tuple variants are represented in JSON as `{ NAME: [DATA...] }` so
// deserialize the sequence of data here.
fn tuple_variant<V>(self, _len: usize, visitor: V) -> Result<V::Value> fn tuple_variant<V>(self, _len: usize, visitor: V) -> Result<V::Value>
where where
V: Visitor<'de>, V: Visitor<'de>,
@ -595,8 +672,6 @@ impl<'de, 'a> VariantAccess<'de> for Enum<'a, 'de> {
de::Deserializer::deserialize_seq(self.de, visitor) de::Deserializer::deserialize_seq(self.de, visitor)
} }
// Struct variants are represented in JSON as `{ NAME: { K: V, ... } }` so
// deserialize the inner map here.
fn struct_variant<V>(self, _fields: &'static [&'static str], visitor: V) -> Result<V::Value> fn struct_variant<V>(self, _fields: &'static [&'static str], visitor: V) -> Result<V::Value>
where where
V: Visitor<'de>, V: Visitor<'de>,
@ -607,45 +682,109 @@ impl<'de, 'a> VariantAccess<'de> for Enum<'a, 'de> {
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
#[test] #[cfg(test)]
fn test_struct() { mod tests {
#[derive(Deserialize, PartialEq, Debug)] use serde_derive::Deserialize;
struct Test {
int: u32, fn from_str<'a, T>(source: &'a str) -> super::Result<T>
seq: Vec<String>, where
T: serde::Deserialize<'a>,
{
match super::from_str(source) {
Ok(res) => Ok(res),
Err(super::SerdeParseError::Spanned(err)) => {
let with_source = err.with_source(source);
eprintln!("{}", with_source);
Err(super::SerdeParseError::Spanned(with_source.spanned()))
}
Err(err) => {
eprintln!("{}", err);
Err(err)
}
}
} }
let j = r#"{"int":1,"seq":["a","b"]}"#; #[test]
let expected = Test { fn test_vec() {
int: 1, let j = r#"["a","b"]"#;
seq: vec!["a".to_owned(), "b".to_owned()], let expected: Vec<String> = vec!["a".to_string(), "b".to_string()];
}; assert_eq!(expected, from_str::<Vec<String>>(j).unwrap());
assert_eq!(expected, from_str(j).unwrap());
}
#[test]
fn test_enum() {
#[derive(Deserialize, PartialEq, Debug)]
enum E {
Unit,
Newtype(u32),
Tuple(u32, u32),
Struct { a: u32 },
} }
let j = r#""Unit""#; #[test]
let expected = E::Unit; fn test_vec_explicit_keys() {
assert_eq!(expected, from_str(j).unwrap()); let j = r#"[0=>"a", 1=>"b"]"#;
let expected: Vec<String> = vec!["a".to_string(), "b".to_string()];
assert_eq!(expected, from_str::<Vec<String>>(j).unwrap());
}
let j = r#"{"Newtype":1}"#; #[test]
let expected = E::Newtype(1); fn test_struct() {
assert_eq!(expected, from_str(j).unwrap()); #[derive(Deserialize, PartialEq, Debug)]
struct Test {
int: u32,
seq: Vec<String>,
}
let j = r#"{"Tuple":[1,2]}"#; let j = r#"["int"=>1,"seq"=>["a","b"]]"#;
let expected = E::Tuple(1, 2); let expected = Test {
assert_eq!(expected, from_str(j).unwrap()); int: 1,
seq: vec!["a".to_owned(), "b".to_owned()],
};
assert_eq!(expected, from_str(j).unwrap());
}
let j = r#"{"Struct":{"a":1}}"#; #[test]
let expected = E::Struct { a: 1 }; fn test_struct_nested() {
assert_eq!(expected, from_str(j).unwrap()); #[derive(Deserialize, PartialEq, Debug)]
struct Inner {
a: f32,
b: bool,
}
#[derive(Deserialize, PartialEq, Debug)]
struct Test {
int: u32,
nested: Inner,
}
let j = r#"["int"=>1,"nested"=>["a" => 1.0, "b" => false]]"#;
let expected = Test {
int: 1,
nested: Inner { a: 1.0, b: false },
};
assert_eq!(expected, from_str(j).unwrap());
}
#[test]
fn test_enum() {
#[derive(Deserialize, PartialEq, Debug)]
enum E {
Unit,
Newtype1(u32),
Newtype2(u32),
Tuple(u32, u32),
Struct { a: u32 },
}
let j = r#""Unit""#;
let expected = E::Unit;
assert_eq!(expected, from_str(j).unwrap());
let j = r#"["Newtype1"=>1]"#;
let expected = E::Newtype1(1);
assert_eq!(expected, from_str(j).unwrap());
let j = r#"["Newtype2"=>1]"#;
let expected = E::Newtype2(1);
assert_eq!(expected, from_str(j).unwrap());
let j = r#"["Tuple"=>[1,2]]"#;
let expected = E::Tuple(1, 2);
assert_eq!(expected, from_str(j).unwrap());
let j = r#"["Struct"=>["a"=>1]]"#;
let expected = E::Struct { a: 1 };
assert_eq!(expected, from_str(j).unwrap());
}
} }