mirror of
https://codeberg.org/icewind/php-literal-parser.git
synced 2026-06-03 18:44:07 +02:00
serde stuff
This commit is contained in:
parent
bf1edefb25
commit
80a72c3d72
7 changed files with 466 additions and 285 deletions
|
|
@ -13,6 +13,8 @@ logos = "0.11"
|
|||
thiserror = "1.0"
|
||||
source-span = "2.2"
|
||||
memchr = "2.3.4"
|
||||
serde = "1.0"
|
||||
|
||||
[dev-dependencies]
|
||||
maplit = "1.0.2"
|
||||
serde_derive = "1.0"
|
||||
24
src/error.rs
24
src/error.rs
|
|
@ -70,6 +70,12 @@ pub struct SourceSpannedError<'source, T> {
|
|||
source: &'source str,
|
||||
}
|
||||
|
||||
impl<'source, T: Error + Debug + 'static> SourceSpannedError<'source, T> {
|
||||
pub fn spanned(self) -> SpannedError<T> {
|
||||
SpannedError::new(self.error, self.span)
|
||||
}
|
||||
}
|
||||
|
||||
const METRICS: DefaultMetrics = DefaultMetrics::with_tab_stop(4);
|
||||
|
||||
impl<'source, T: Error + Debug> Display for SourceSpannedError<'source, T> {
|
||||
|
|
@ -122,6 +128,10 @@ pub enum ParseError {
|
|||
InvalidFloatLiteral(#[from] ParseFloatError),
|
||||
#[error("Invalid string literal")]
|
||||
InvalidStringLiteral,
|
||||
#[error("Array key not valid for this position")]
|
||||
UnexpectedArrayKey,
|
||||
#[error("Trailing characters after parsing")]
|
||||
TrailingCharacters,
|
||||
}
|
||||
|
||||
impl From<UnescapeError> for ParseError {
|
||||
|
|
@ -190,6 +200,20 @@ impl<'source> ExpectToken<'source> for Option<SpannedToken<'source>> {
|
|||
}
|
||||
}
|
||||
|
||||
impl<'a, 'source> ExpectToken<'source> for Option<&'a SpannedToken<'source>> {
|
||||
fn expect_token(
|
||||
self,
|
||||
expected: &[Token],
|
||||
) -> Result<SpannedToken<'source>, SpannedError<ParseError>> {
|
||||
self.ok_or_else(|| UnexpectedTokenError {
|
||||
expected: expected.to_vec(),
|
||||
found: None,
|
||||
})
|
||||
.with_span(usize::max_value()..usize::max_value())
|
||||
.and_then(|token| token.clone().expect_token(expected))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'source> ExpectToken<'source> for SpannedToken<'source> {
|
||||
fn expect_token(
|
||||
self,
|
||||
|
|
|
|||
|
|
@ -166,6 +166,7 @@ fn test_lex_float() {
|
|||
assert_eq!(lex.next(), None);
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct SpannedToken<'source> {
|
||||
pub token: Token,
|
||||
pub span: Span,
|
||||
|
|
|
|||
|
|
@ -23,6 +23,7 @@ mod error;
|
|||
mod lexer;
|
||||
mod num;
|
||||
mod parser;
|
||||
mod serde;
|
||||
mod string;
|
||||
|
||||
pub use error::{ParseError, SpannedError};
|
||||
|
|
|
|||
|
|
@ -8,6 +8,8 @@ pub enum ParseIntError {
|
|||
InvalidDigit,
|
||||
#[error("number too large or small to fit in target type")]
|
||||
Overflow,
|
||||
#[error("unexpected negative number")]
|
||||
UnexpectedNegative,
|
||||
}
|
||||
|
||||
/// Mostly copied from std
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@ use crate::string::{is_array_key_numeric, parse_string};
|
|||
use crate::{Key, Value};
|
||||
use logos::Logos;
|
||||
use std::collections::HashMap;
|
||||
use std::iter::Peekable;
|
||||
use std::num::ParseFloatError;
|
||||
|
||||
/// Parse a php literal
|
||||
|
|
@ -30,16 +31,24 @@ pub fn parse(source: &str) -> Result<Value, SpannedError<ParseError>> {
|
|||
}
|
||||
|
||||
pub struct Parser<'source> {
|
||||
tokens: TokenStream<'source>,
|
||||
tokens: Peekable<TokenStream<'source>>,
|
||||
}
|
||||
|
||||
impl<'source> Parser<'source> {
|
||||
pub fn new(source: &'source str) -> Self {
|
||||
Parser {
|
||||
tokens: TokenStream::new(Token::lexer(source)),
|
||||
tokens: TokenStream::new(Token::lexer(source)).peekable(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn next_token(&mut self) -> Option<SpannedToken<'source>> {
|
||||
self.tokens.next()
|
||||
}
|
||||
|
||||
pub fn eat_token(&mut self) {
|
||||
let _ = self.tokens.next();
|
||||
}
|
||||
|
||||
pub fn run(&mut self) -> Result<Value, SpannedError<ParseError>> {
|
||||
let token = self.tokens.next().expect_token(&[
|
||||
Token::Bool,
|
||||
|
|
@ -55,10 +64,10 @@ impl<'source> Parser<'source> {
|
|||
|
||||
pub fn parse_any(&mut self, token: SpannedToken) -> Result<Value, SpannedError<ParseError>> {
|
||||
let value = match token.token {
|
||||
Token::Bool => Value::Bool(self.parse_bool(token)?),
|
||||
Token::Integer => Value::Int(self.parse_int(token)?),
|
||||
Token::Float => Value::Float(self.parse_float(token)?),
|
||||
Token::LiteralString => Value::String(self.parse_string(token)?),
|
||||
Token::Bool => Value::Bool(self.parse_bool_token(token)?),
|
||||
Token::Integer => Value::Int(self.parse_int_token(token)?),
|
||||
Token::Float => Value::Float(self.parse_float_token(token)?),
|
||||
Token::LiteralString => Value::String(self.parse_string_token(token)?),
|
||||
Token::Null => Value::Null,
|
||||
Token::Array => Value::Array(self.parse_array(ArraySyntax::Long)?),
|
||||
Token::SquareOpen => Value::Array(self.parse_array(ArraySyntax::Short)?),
|
||||
|
|
@ -68,7 +77,7 @@ impl<'source> Parser<'source> {
|
|||
Ok(value)
|
||||
}
|
||||
|
||||
pub fn parse_bool(&self, token: SpannedToken) -> Result<bool, SpannedError<ParseError>> {
|
||||
pub fn parse_bool_token(&self, token: SpannedToken) -> Result<bool, SpannedError<ParseError>> {
|
||||
token
|
||||
.slice()
|
||||
.to_ascii_lowercase()
|
||||
|
|
@ -76,15 +85,18 @@ impl<'source> Parser<'source> {
|
|||
.with_span(token.span)
|
||||
}
|
||||
|
||||
pub fn parse_int(&self, token: SpannedToken) -> Result<i64, SpannedError<ParseError>> {
|
||||
pub fn parse_int_token(&self, token: SpannedToken) -> Result<i64, SpannedError<ParseError>> {
|
||||
parse_int(token.slice()).with_span(token.span)
|
||||
}
|
||||
|
||||
pub fn parse_float(&self, token: SpannedToken) -> Result<f64, SpannedError<ParseError>> {
|
||||
pub fn parse_float_token(&self, token: SpannedToken) -> Result<f64, SpannedError<ParseError>> {
|
||||
parse_float(token.slice()).with_span(token.span)
|
||||
}
|
||||
|
||||
pub fn parse_string(&self, token: SpannedToken) -> Result<String, SpannedError<ParseError>> {
|
||||
pub fn parse_string_token(
|
||||
&self,
|
||||
token: SpannedToken,
|
||||
) -> Result<String, SpannedError<ParseError>> {
|
||||
parse_string(token.slice()).with_span(token.span)
|
||||
}
|
||||
|
||||
|
|
@ -192,8 +204,8 @@ fn parse_float(literal: &str) -> Result<f64, ParseFloatError> {
|
|||
|
||||
#[derive(Default)]
|
||||
struct ArrayBuilder {
|
||||
next_int_key: i64,
|
||||
data: HashMap<Key, Value>,
|
||||
next_int_key: i64,
|
||||
}
|
||||
|
||||
impl ArrayBuilder {
|
||||
|
|
@ -211,14 +223,14 @@ impl ArrayBuilder {
|
|||
}
|
||||
}
|
||||
|
||||
#[derive(Eq, PartialEq)]
|
||||
#[derive(Eq, PartialEq, Copy, Clone)]
|
||||
pub enum ArraySyntax {
|
||||
Short,
|
||||
Long,
|
||||
}
|
||||
|
||||
impl ArraySyntax {
|
||||
fn close_bracket(&self) -> Token {
|
||||
pub fn close_bracket(&self) -> Token {
|
||||
match self {
|
||||
ArraySyntax::Long => Token::BracketClose,
|
||||
ArraySyntax::Short => Token::SquareClose,
|
||||
|
|
|
|||
611
src/serde.rs
611
src/serde.rs
|
|
@ -1,32 +1,78 @@
|
|||
use std::ops::{AddAssign, MulAssign, Neg};
|
||||
|
||||
use serde::de::{
|
||||
self, DeserializeSeed, EnumAccess, IntoDeserializer, MapAccess, SeqAccess, VariantAccess,
|
||||
Visitor,
|
||||
};
|
||||
use serde::Deserialize;
|
||||
|
||||
use crate::lexer::Token;
|
||||
use crate::parser::{SpannedToken, TokenStream};
|
||||
use crate::ParseError;
|
||||
use error::{Error, Result};
|
||||
use logos::Lexer;
|
||||
use std::iter::Peekable;
|
||||
use crate::error::{ExpectToken, ResultExt};
|
||||
use crate::lexer::{SpannedToken, Token};
|
||||
use crate::num::ParseIntError;
|
||||
use crate::parser::{ArraySyntax, Parser};
|
||||
use crate::{Key, ParseError, SpannedError};
|
||||
use serde::export::TryFrom;
|
||||
use std::error::Error;
|
||||
use std::fmt::{Display, Formatter};
|
||||
|
||||
type Result<T> = std::error::Result<T, ParseError>;
|
||||
#[derive(Debug)]
|
||||
pub enum SerdeParseError {
|
||||
Parse(ParseError),
|
||||
Spanned(SpannedError<ParseError>),
|
||||
Custom(String),
|
||||
}
|
||||
|
||||
impl From<SpannedError<ParseError>> for SerdeParseError {
|
||||
fn from(err: SpannedError<ParseError>) -> Self {
|
||||
SerdeParseError::Spanned(err)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<ParseError> for SerdeParseError {
|
||||
fn from(err: ParseError) -> Self {
|
||||
SerdeParseError::Parse(err)
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for SerdeParseError {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
SerdeParseError::Spanned(err) => write!(f, "{}", err),
|
||||
SerdeParseError::Parse(err) => write!(f, "{}", err),
|
||||
SerdeParseError::Custom(err) => write!(f, "{}", err),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Error for SerdeParseError {
|
||||
fn source(&self) -> Option<&(dyn Error + 'static)> {
|
||||
match self {
|
||||
SerdeParseError::Spanned(err) => Some(err),
|
||||
SerdeParseError::Parse(err) => Some(err),
|
||||
SerdeParseError::Custom(_) => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl serde::de::Error for SerdeParseError {
|
||||
fn custom<T>(msg: T) -> Self
|
||||
where
|
||||
T: Display,
|
||||
{
|
||||
SerdeParseError::Custom(msg.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
type Result<T> = std::result::Result<T, SerdeParseError>;
|
||||
|
||||
pub struct Deserializer<'de> {
|
||||
input: &'de str,
|
||||
token_stream: Peekable<TokenStream<'de>>,
|
||||
parser: Parser<'de>,
|
||||
peeked: Option<SpannedToken<'de>>,
|
||||
}
|
||||
|
||||
impl<'de> Deserializer<'de> {
|
||||
pub fn from_str(input: &'de str) -> Self {
|
||||
let lexer = Token::lexer(source);
|
||||
let token_stream = TokenStream::new(lexer, input);
|
||||
Deserializer {
|
||||
input,
|
||||
token_stream: token_stream.peekable(),
|
||||
parser: Parser::new(input),
|
||||
peeked: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -37,104 +83,108 @@ where
|
|||
{
|
||||
let mut deserializer = Deserializer::from_str(s);
|
||||
let t = T::deserialize(&mut deserializer)?;
|
||||
if deserializer.input.is_empty() {
|
||||
if deserializer.next_token().is_none() {
|
||||
Ok(t)
|
||||
} else {
|
||||
Err(Error::TrailingCharacters)
|
||||
Err(ParseError::TrailingCharacters.into())
|
||||
}
|
||||
}
|
||||
|
||||
impl<'de> Deserializer<'de> {
|
||||
fn parse_bool(&mut self) -> Result<bool> {
|
||||
let token = self.token_stream.next()
|
||||
if self.input.starts_with("true") {
|
||||
self.input = &self.input["true".len()..];
|
||||
Ok(true)
|
||||
} else if self.input.starts_with("false") {
|
||||
self.input = &self.input["false".len()..];
|
||||
Ok(false)
|
||||
} else {
|
||||
Err(Error::ExpectedBoolean)
|
||||
fn next_token(&mut self) -> Option<SpannedToken<'de>> {
|
||||
self.peeked.take().or_else(|| self.parser.next_token())
|
||||
}
|
||||
|
||||
fn peek_token(&mut self) -> Option<&SpannedToken<'de>> {
|
||||
if self.peeked.is_none() {
|
||||
self.peeked = self.next_token()
|
||||
}
|
||||
self.peeked.as_ref()
|
||||
}
|
||||
|
||||
fn eat_token(&mut self) {
|
||||
if self.peeked.take().is_none() {
|
||||
self.parser.eat_token()
|
||||
}
|
||||
}
|
||||
|
||||
// Parse a group of decimal digits as an unsigned integer of type T.
|
||||
//
|
||||
// This implementation is a bit too lenient, for example `001` is not
|
||||
// allowed in JSON. Also the various arithmetic operations can overflow and
|
||||
// panic or return bogus data. But it is good enough for example code!
|
||||
fn parse_bool(&mut self) -> Result<bool> {
|
||||
let token = self.next_token().expect_token(&[Token::Bool])?;
|
||||
Ok(self.parser.parse_bool_token(token)?)
|
||||
}
|
||||
|
||||
fn set_peeked(&mut self, peeked: SpannedToken<'de>) {
|
||||
self.peeked = Some(peeked)
|
||||
}
|
||||
|
||||
fn parse_unsigned<T>(&mut self) -> Result<T>
|
||||
where
|
||||
T: AddAssign<T> + MulAssign<T> + From<u8>,
|
||||
T: TryFrom<i64>,
|
||||
{
|
||||
let mut int = match self.next_char()? {
|
||||
ch @ '0'..='9' => T::from(ch as u8 - b'0'),
|
||||
_ => {
|
||||
return Err(Error::ExpectedInteger);
|
||||
}
|
||||
};
|
||||
loop {
|
||||
match self.input.chars().next() {
|
||||
Some(ch @ '0'..='9') => {
|
||||
self.input = &self.input[1..];
|
||||
int *= T::from(10);
|
||||
int += T::from(ch as u8 - b'0');
|
||||
}
|
||||
_ => {
|
||||
return Ok(int);
|
||||
}
|
||||
}
|
||||
let token = self.next_token().expect_token(&[Token::Integer])?;
|
||||
let span = token.span.clone();
|
||||
let int = self.parser.parse_int_token(token)?;
|
||||
if int < 0 {
|
||||
Err(SpannedError::new(
|
||||
ParseError::InvalidIntLiteral(ParseIntError::UnexpectedNegative),
|
||||
span,
|
||||
)
|
||||
.into())
|
||||
} else {
|
||||
Ok(T::try_from(int).map_err(|_| {
|
||||
SpannedError::new(ParseError::InvalidIntLiteral(ParseIntError::Overflow), span)
|
||||
})?)
|
||||
}
|
||||
}
|
||||
|
||||
// Parse a possible minus sign followed by a group of decimal digits as a
|
||||
// signed integer of type T.
|
||||
fn parse_signed<T>(&mut self) -> Result<T>
|
||||
where
|
||||
T: Neg<Output = T> + AddAssign<T> + MulAssign<T> + From<i8>,
|
||||
T: TryFrom<i64>,
|
||||
{
|
||||
// Optional minus sign, delegate to `parse_unsigned`, negate if negative.
|
||||
unimplemented!()
|
||||
let token = self.next_token().expect_token(&[Token::Integer])?;
|
||||
let span = token.span.clone();
|
||||
Ok(
|
||||
T::try_from(self.parser.parse_int_token(token)?).map_err(|_| {
|
||||
SpannedError::new(ParseError::InvalidIntLiteral(ParseIntError::Overflow), span)
|
||||
})?,
|
||||
)
|
||||
}
|
||||
|
||||
// Parse a string until the next '"' character.
|
||||
//
|
||||
// Makes no attempt to handle escape sequences. What did you expect? This is
|
||||
// example code!
|
||||
fn parse_string(&mut self) -> Result<&'de str> {
|
||||
if self.next_char()? != '"' {
|
||||
return Err(Error::ExpectedString);
|
||||
}
|
||||
match self.input.find('"') {
|
||||
Some(len) => {
|
||||
let s = &self.input[..len];
|
||||
self.input = &self.input[len + 1..];
|
||||
Ok(s)
|
||||
}
|
||||
None => Err(Error::Eof),
|
||||
fn parse_float(&mut self) -> Result<f64> {
|
||||
let token = self.next_token().expect_token(&[Token::Float])?;
|
||||
Ok(self.parser.parse_float_token(token)?)
|
||||
}
|
||||
|
||||
fn parse_string(&mut self) -> Result<String> {
|
||||
let token = self.next_token().expect_token(&[Token::LiteralString])?;
|
||||
Ok(self.parser.parse_string_token(token)?)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'de, 'a> de::Deserializer<'de> for &'a mut Deserializer<'de> {
|
||||
type Error = ParseError;
|
||||
type Error = SerdeParseError;
|
||||
|
||||
// Look at the input data to decide what Serde data model type to
|
||||
// deserialize as. Not all data formats are able to support this operation.
|
||||
// Formats that support `deserialize_any` are known as self-describing.
|
||||
fn deserialize_any<V>(self, visitor: V) -> Result<V::Value>
|
||||
where
|
||||
V: Visitor<'de>,
|
||||
{
|
||||
match self.peek_char()? {
|
||||
'n' => self.deserialize_unit(visitor),
|
||||
't' | 'f' => self.deserialize_bool(visitor),
|
||||
'"' => self.deserialize_str(visitor),
|
||||
'0'..='9' => self.deserialize_u64(visitor),
|
||||
'-' => self.deserialize_i64(visitor),
|
||||
'[' => self.deserialize_seq(visitor),
|
||||
'{' => self.deserialize_map(visitor),
|
||||
_ => Err(Error::Syntax),
|
||||
let peek = self.peek_token().expect_token(&[
|
||||
Token::Null,
|
||||
Token::Bool,
|
||||
Token::LiteralString,
|
||||
Token::Integer,
|
||||
Token::Float,
|
||||
Token::Array,
|
||||
Token::SquareOpen,
|
||||
])?;
|
||||
match peek.token {
|
||||
Token::Null => self.deserialize_unit(visitor),
|
||||
Token::Bool => self.deserialize_bool(visitor),
|
||||
Token::LiteralString => self.deserialize_string(visitor),
|
||||
Token::Integer => self.deserialize_i64(visitor),
|
||||
Token::Float => self.deserialize_f64(visitor),
|
||||
Token::Array | Token::SquareOpen => self.deserialize_seq(visitor),
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -203,20 +253,18 @@ impl<'de, 'a> de::Deserializer<'de> for &'a mut Deserializer<'de> {
|
|||
visitor.visit_u64(self.parse_unsigned()?)
|
||||
}
|
||||
|
||||
// Float parsing is stupidly hard.
|
||||
fn deserialize_f32<V>(self, _visitor: V) -> Result<V::Value>
|
||||
fn deserialize_f32<V>(self, visitor: V) -> Result<V::Value>
|
||||
where
|
||||
V: Visitor<'de>,
|
||||
{
|
||||
unimplemented!()
|
||||
visitor.visit_f32(self.parse_float()? as f32)
|
||||
}
|
||||
|
||||
// Float parsing is stupidly hard.
|
||||
fn deserialize_f64<V>(self, _visitor: V) -> Result<V::Value>
|
||||
fn deserialize_f64<V>(self, visitor: V) -> Result<V::Value>
|
||||
where
|
||||
V: Visitor<'de>,
|
||||
{
|
||||
unimplemented!()
|
||||
visitor.visit_f64(self.parse_float()?)
|
||||
}
|
||||
|
||||
// The `Serializer` implementation on the previous page serialized chars as
|
||||
|
|
@ -235,14 +283,15 @@ impl<'de, 'a> de::Deserializer<'de> for &'a mut Deserializer<'de> {
|
|||
where
|
||||
V: Visitor<'de>,
|
||||
{
|
||||
visitor.visit_borrowed_str(self.parse_string()?)
|
||||
let str = self.parse_string()?;
|
||||
visitor.visit_str(str.as_str())
|
||||
}
|
||||
|
||||
fn deserialize_string<V>(self, visitor: V) -> Result<V::Value>
|
||||
where
|
||||
V: Visitor<'de>,
|
||||
{
|
||||
self.deserialize_str(visitor)
|
||||
visitor.visit_string(self.parse_string()?)
|
||||
}
|
||||
|
||||
// The `Serializer` implementation on the previous page serialized byte
|
||||
|
|
@ -251,14 +300,15 @@ impl<'de, 'a> de::Deserializer<'de> for &'a mut Deserializer<'de> {
|
|||
where
|
||||
V: Visitor<'de>,
|
||||
{
|
||||
unimplemented!()
|
||||
todo!()
|
||||
}
|
||||
|
||||
fn deserialize_byte_buf<V>(self, _visitor: V) -> Result<V::Value>
|
||||
where
|
||||
V: Visitor<'de>,
|
||||
{
|
||||
unimplemented!()
|
||||
todo!()
|
||||
// visitor.visit_string(self.parse_string()?.to_vec())
|
||||
}
|
||||
|
||||
// An absent optional is represented as the JSON `null` and a present
|
||||
|
|
@ -273,28 +323,31 @@ impl<'de, 'a> de::Deserializer<'de> for &'a mut Deserializer<'de> {
|
|||
where
|
||||
V: Visitor<'de>,
|
||||
{
|
||||
if self.input.starts_with("null") {
|
||||
self.input = &self.input["null".len()..];
|
||||
let token = self.peek_token().expect_token(&[
|
||||
Token::Null,
|
||||
Token::Bool,
|
||||
Token::LiteralString,
|
||||
Token::Integer,
|
||||
Token::Float,
|
||||
Token::Array,
|
||||
Token::SquareOpen,
|
||||
])?;
|
||||
if token.token == Token::Null {
|
||||
let _ = self.next_token();
|
||||
visitor.visit_none()
|
||||
} else {
|
||||
visitor.visit_some(self)
|
||||
}
|
||||
}
|
||||
|
||||
// In Serde, unit means an anonymous value containing no data.
|
||||
fn deserialize_unit<V>(self, visitor: V) -> Result<V::Value>
|
||||
where
|
||||
V: Visitor<'de>,
|
||||
{
|
||||
if self.input.starts_with("null") {
|
||||
self.input = &self.input["null".len()..];
|
||||
self.next_token().expect_token(&[Token::Null])?;
|
||||
visitor.visit_unit()
|
||||
} else {
|
||||
Err(Error::ExpectedNull)
|
||||
}
|
||||
}
|
||||
|
||||
// Unit struct means a named value containing no data.
|
||||
fn deserialize_unit_struct<V>(self, _name: &'static str, visitor: V) -> Result<V::Value>
|
||||
where
|
||||
V: Visitor<'de>,
|
||||
|
|
@ -302,9 +355,6 @@ impl<'de, 'a> de::Deserializer<'de> for &'a mut Deserializer<'de> {
|
|||
self.deserialize_unit(visitor)
|
||||
}
|
||||
|
||||
// As is done here, serializers are encouraged to treat newtype structs as
|
||||
// insignificant wrappers around the data they contain. That means not
|
||||
// parsing anything other than the contained value.
|
||||
fn deserialize_newtype_struct<V>(self, _name: &'static str, visitor: V) -> Result<V::Value>
|
||||
where
|
||||
V: Visitor<'de>,
|
||||
|
|
@ -312,34 +362,23 @@ impl<'de, 'a> de::Deserializer<'de> for &'a mut Deserializer<'de> {
|
|||
visitor.visit_newtype_struct(self)
|
||||
}
|
||||
|
||||
// Deserialization of compound types like sequences and maps happens by
|
||||
// passing the visitor an "Access" object that gives it the ability to
|
||||
// iterate through the data contained in the sequence.
|
||||
fn deserialize_seq<V>(mut self, visitor: V) -> Result<V::Value>
|
||||
where
|
||||
V: Visitor<'de>,
|
||||
{
|
||||
// Parse the opening bracket of the sequence.
|
||||
if self.next_char()? == '[' {
|
||||
// Give the visitor access to each element of the sequence.
|
||||
let value = visitor.visit_seq(CommaSeparated::new(&mut self))?;
|
||||
// Parse the closing bracket of the sequence.
|
||||
if self.next_char()? == ']' {
|
||||
let token = self
|
||||
.next_token()
|
||||
.expect_token(&[Token::Array, Token::SquareOpen])?;
|
||||
let syntax = match token.token {
|
||||
Token::Array => ArraySyntax::Long,
|
||||
Token::SquareOpen => ArraySyntax::Short,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
let value = visitor.visit_seq(ArrayWalker::new(&mut self, syntax))?;
|
||||
Ok(value)
|
||||
} else {
|
||||
Err(Error::ExpectedArrayEnd)
|
||||
}
|
||||
} else {
|
||||
Err(Error::ExpectedArray)
|
||||
}
|
||||
}
|
||||
|
||||
// Tuples look just like sequences in JSON. Some formats may be able to
|
||||
// represent tuples more efficiently.
|
||||
//
|
||||
// As indicated by the length parameter, the `Deserialize` implementation
|
||||
// for a tuple in the Serde data model is required to know the length of the
|
||||
// tuple before even looking at the input data.
|
||||
fn deserialize_tuple<V>(self, _len: usize, visitor: V) -> Result<V::Value>
|
||||
where
|
||||
V: Visitor<'de>,
|
||||
|
|
@ -347,7 +386,6 @@ impl<'de, 'a> de::Deserializer<'de> for &'a mut Deserializer<'de> {
|
|||
self.deserialize_seq(visitor)
|
||||
}
|
||||
|
||||
// Tuple structs look just like sequences in JSON.
|
||||
fn deserialize_tuple_struct<V>(
|
||||
self,
|
||||
_name: &'static str,
|
||||
|
|
@ -360,34 +398,23 @@ impl<'de, 'a> de::Deserializer<'de> for &'a mut Deserializer<'de> {
|
|||
self.deserialize_seq(visitor)
|
||||
}
|
||||
|
||||
// Much like `deserialize_seq` but calls the visitors `visit_map` method
|
||||
// with a `MapAccess` implementation, rather than the visitor's `visit_seq`
|
||||
// method with a `SeqAccess` implementation.
|
||||
fn deserialize_map<V>(mut self, visitor: V) -> Result<V::Value>
|
||||
where
|
||||
V: Visitor<'de>,
|
||||
{
|
||||
// Parse the opening brace of the map.
|
||||
if self.next_char()? == '{' {
|
||||
// Give the visitor access to each entry of the map.
|
||||
let value = visitor.visit_map(CommaSeparated::new(&mut self))?;
|
||||
// Parse the closing brace of the map.
|
||||
if self.next_char()? == '}' {
|
||||
let token = self
|
||||
.next_token()
|
||||
.expect_token(&[Token::Array, Token::SquareOpen])?;
|
||||
let syntax = match token.token {
|
||||
Token::Array => ArraySyntax::Long,
|
||||
Token::SquareOpen => ArraySyntax::Short,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
let value = visitor.visit_map(ArrayWalker::new(&mut self, syntax))?;
|
||||
Ok(value)
|
||||
} else {
|
||||
Err(Error::ExpectedMapEnd)
|
||||
}
|
||||
} else {
|
||||
Err(Error::ExpectedMap)
|
||||
}
|
||||
}
|
||||
|
||||
// Structs look just like maps in JSON.
|
||||
//
|
||||
// Notice the `fields` parameter - a "struct" in the Serde data model means
|
||||
// that the `Deserialize` implementation is required to know what the fields
|
||||
// are before even looking at the input data. Any key-value pairing in which
|
||||
// the fields cannot be known ahead of time is probably a map.
|
||||
fn deserialize_struct<V>(
|
||||
self,
|
||||
_name: &'static str,
|
||||
|
|
@ -409,27 +436,31 @@ impl<'de, 'a> de::Deserializer<'de> for &'a mut Deserializer<'de> {
|
|||
where
|
||||
V: Visitor<'de>,
|
||||
{
|
||||
if self.peek_char()? == '"' {
|
||||
// Visit a unit variant.
|
||||
visitor.visit_enum(self.parse_string()?.into_deserializer())
|
||||
} else if self.next_char()? == '{' {
|
||||
// Visit a newtype variant, tuple variant, or struct variant.
|
||||
// panic!("a");
|
||||
let token = self.peek_token().expect_token(&[
|
||||
Token::LiteralString,
|
||||
Token::Array,
|
||||
Token::SquareOpen,
|
||||
])?;
|
||||
match token.token {
|
||||
Token::LiteralString => visitor.visit_enum(self.parse_string()?.into_deserializer()),
|
||||
Token::Array | Token::SquareOpen => {
|
||||
self.eat_token();
|
||||
let syntax = if token.token == Token::Array {
|
||||
self.next_token().expect_token(&[Token::BracketOpen])?;
|
||||
ArraySyntax::Long
|
||||
} else {
|
||||
ArraySyntax::Short
|
||||
};
|
||||
|
||||
let value = visitor.visit_enum(Enum::new(self))?;
|
||||
// Parse the matching close brace.
|
||||
if self.next_char()? == '}' {
|
||||
self.next_token().expect_token(&[syntax.close_bracket()])?;
|
||||
Ok(value)
|
||||
} else {
|
||||
Err(Error::ExpectedMapEnd)
|
||||
}
|
||||
} else {
|
||||
Err(Error::ExpectedEnum)
|
||||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
|
||||
// An identifier in Serde is the type that identifies a field of a struct or
|
||||
// the variant of an enum. In JSON, struct fields and enum variants are
|
||||
// represented as strings. In other formats they may be represented as
|
||||
// numeric indices.
|
||||
fn deserialize_identifier<V>(self, visitor: V) -> Result<V::Value>
|
||||
where
|
||||
V: Visitor<'de>,
|
||||
|
|
@ -437,17 +468,6 @@ impl<'de, 'a> de::Deserializer<'de> for &'a mut Deserializer<'de> {
|
|||
self.deserialize_str(visitor)
|
||||
}
|
||||
|
||||
// Like `deserialize_any` but indicates to the `Deserializer` that it makes
|
||||
// no difference which `Visitor` method is called because the data is
|
||||
// ignored.
|
||||
//
|
||||
// Some deserializers are able to implement this more efficiently than
|
||||
// `deserialize_any`, for example by rapidly skipping over matched
|
||||
// delimiters without paying close attention to the data in between.
|
||||
//
|
||||
// Some formats are not able to implement this at all. Formats that can
|
||||
// implement `deserialize_any` and `deserialize_ignored_any` are known as
|
||||
// self-describing.
|
||||
fn deserialize_ignored_any<V>(self, visitor: V) -> Result<V::Value>
|
||||
where
|
||||
V: Visitor<'de>,
|
||||
|
|
@ -456,62 +476,116 @@ impl<'de, 'a> de::Deserializer<'de> for &'a mut Deserializer<'de> {
|
|||
}
|
||||
}
|
||||
|
||||
// In order to handle commas correctly when deserializing a JSON array or map,
|
||||
// we need to track whether we are on the first element or past the first
|
||||
// element.
|
||||
struct CommaSeparated<'a, 'de: 'a> {
|
||||
de: &'a mut Deserializer<'de>,
|
||||
first: bool,
|
||||
struct ArrayWalker<'source, 'a> {
|
||||
de: &'a mut Deserializer<'source>,
|
||||
next_int_key: i64,
|
||||
syntax: ArraySyntax,
|
||||
done: bool,
|
||||
}
|
||||
|
||||
impl<'a, 'de> CommaSeparated<'a, 'de> {
|
||||
fn new(de: &'a mut Deserializer<'de>) -> Self {
|
||||
CommaSeparated { de, first: true }
|
||||
impl<'source, 'a> ArrayWalker<'source, 'a> {
|
||||
pub fn new(de: &'a mut Deserializer<'source>, syntax: ArraySyntax) -> Self {
|
||||
ArrayWalker {
|
||||
de,
|
||||
next_int_key: 0,
|
||||
syntax,
|
||||
done: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// `SeqAccess` is provided to the `Visitor` to give it the ability to iterate
|
||||
// through elements of the sequence.
|
||||
impl<'de, 'a> SeqAccess<'de> for CommaSeparated<'a, 'de> {
|
||||
type Error = Error;
|
||||
impl<'de, 'a> SeqAccess<'de> for ArrayWalker<'de, 'a> {
|
||||
type Error = SerdeParseError;
|
||||
|
||||
fn next_element_seed<T>(&mut self, seed: T) -> Result<Option<T::Value>>
|
||||
where
|
||||
T: DeserializeSeed<'de>,
|
||||
{
|
||||
// Check if there are no more elements.
|
||||
if self.de.peek_char()? == ']' {
|
||||
if self.done {
|
||||
return Ok(None);
|
||||
}
|
||||
// Comma is required before every element except the first.
|
||||
if !self.first && self.de.next_char()? != ',' {
|
||||
return Err(Error::ExpectedArrayComma);
|
||||
|
||||
let token = self.de.next_token().expect_token(&[
|
||||
Token::Bool,
|
||||
Token::Integer,
|
||||
Token::Float,
|
||||
Token::LiteralString,
|
||||
Token::Null,
|
||||
Token::Array,
|
||||
Token::SquareOpen,
|
||||
])?;
|
||||
|
||||
let next = self.de.next_token().expect_token(&[
|
||||
self.syntax.close_bracket(),
|
||||
Token::Comma,
|
||||
Token::Arrow,
|
||||
])?;
|
||||
|
||||
let value_token = match next.token.clone() {
|
||||
Token::Comma => token,
|
||||
Token::Arrow => {
|
||||
let span = token.span.clone();
|
||||
let key = self.de.parser.parse_array_key(token)?;
|
||||
match key {
|
||||
Key::Int(key) if key == self.next_int_key => Ok(()),
|
||||
_ => Err(ParseError::UnexpectedArrayKey).with_span(span),
|
||||
}?;
|
||||
self.next_int_key += 1;
|
||||
let value = self.de.next_token().expect_token(&[
|
||||
Token::Bool,
|
||||
Token::Integer,
|
||||
Token::Float,
|
||||
Token::LiteralString,
|
||||
Token::Null,
|
||||
Token::Array,
|
||||
Token::SquareOpen,
|
||||
])?;
|
||||
let next = self
|
||||
.de
|
||||
.next_token()
|
||||
.expect_token(&[Token::Comma, self.syntax.close_bracket()])?;
|
||||
if next.token == self.syntax.close_bracket() {
|
||||
self.done = true;
|
||||
}
|
||||
self.first = false;
|
||||
value
|
||||
}
|
||||
peeked_token if peeked_token == self.syntax.close_bracket() => {
|
||||
self.done = true;
|
||||
token
|
||||
}
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
dbg!(value_token.slice());
|
||||
// Deserialize an array element.
|
||||
self.de.set_peeked(value_token);
|
||||
seed.deserialize(&mut *self.de).map(Some)
|
||||
}
|
||||
}
|
||||
|
||||
// `MapAccess` is provided to the `Visitor` to give it the ability to iterate
|
||||
// through entries of the map.
|
||||
impl<'de, 'a> MapAccess<'de> for CommaSeparated<'a, 'de> {
|
||||
type Error = Error;
|
||||
impl<'de, 'a> MapAccess<'de> for ArrayWalker<'de, 'a> {
|
||||
type Error = SerdeParseError;
|
||||
|
||||
fn next_key_seed<K>(&mut self, seed: K) -> Result<Option<K::Value>>
|
||||
where
|
||||
K: DeserializeSeed<'de>,
|
||||
{
|
||||
// Check if there are no more entries.
|
||||
if self.de.peek_char()? == '}' {
|
||||
if self.done {
|
||||
return Ok(None);
|
||||
}
|
||||
// Comma is required before every entry except the first.
|
||||
if !self.first && self.de.next_char()? != ',' {
|
||||
return Err(Error::ExpectedMapComma);
|
||||
}
|
||||
self.first = false;
|
||||
|
||||
let token = self.de.next_token().expect_token(&[
|
||||
Token::Bool,
|
||||
Token::Integer,
|
||||
Token::Float,
|
||||
Token::LiteralString,
|
||||
Token::Null,
|
||||
])?;
|
||||
|
||||
self.de.next_token().expect_token(&[Token::Arrow])?;
|
||||
|
||||
// Deserialize a map key.
|
||||
self.de.set_peeked(token);
|
||||
seed.deserialize(&mut *self.de).map(Some)
|
||||
}
|
||||
|
||||
|
|
@ -519,14 +593,29 @@ impl<'de, 'a> MapAccess<'de> for CommaSeparated<'a, 'de> {
|
|||
where
|
||||
V: DeserializeSeed<'de>,
|
||||
{
|
||||
// It doesn't make a difference whether the colon is parsed at the end
|
||||
// of `next_key_seed` or at the beginning of `next_value_seed`. In this
|
||||
// case the code is a bit simpler having it here.
|
||||
if self.de.next_char()? != ':' {
|
||||
return Err(Error::ExpectedMapColon);
|
||||
let token = self.de.next_token().expect_token(&[
|
||||
Token::Bool,
|
||||
Token::Integer,
|
||||
Token::Float,
|
||||
Token::LiteralString,
|
||||
Token::Null,
|
||||
Token::Array,
|
||||
Token::SquareOpen,
|
||||
])?;
|
||||
|
||||
// Deserialize a map key.
|
||||
self.de.set_peeked(token);
|
||||
let value = seed.deserialize(&mut *self.de)?;
|
||||
|
||||
let next = self
|
||||
.de
|
||||
.next_token()
|
||||
.expect_token(&[Token::Comma, self.syntax.close_bracket()])?;
|
||||
|
||||
if next.token == self.syntax.close_bracket() {
|
||||
self.done = true;
|
||||
}
|
||||
// Deserialize a map value.
|
||||
seed.deserialize(&mut *self.de)
|
||||
Ok(value)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -546,39 +635,29 @@ impl<'a, 'de> Enum<'a, 'de> {
|
|||
// Note that all enum deserialization methods in Serde refer exclusively to the
|
||||
// "externally tagged" enum representation.
|
||||
impl<'de, 'a> EnumAccess<'de> for Enum<'a, 'de> {
|
||||
type Error = Error;
|
||||
type Error = SerdeParseError;
|
||||
type Variant = Self;
|
||||
|
||||
fn variant_seed<V>(self, seed: V) -> Result<(V::Value, Self::Variant)>
|
||||
where
|
||||
V: DeserializeSeed<'de>,
|
||||
{
|
||||
// The `deserialize_enum` method parsed a `{` character so we are
|
||||
// currently inside of a map. The seed will be deserializing itself from
|
||||
// the key of the map.
|
||||
let val = seed.deserialize(&mut *self.de)?;
|
||||
// Parse the colon separating map key from value.
|
||||
if self.de.next_char()? == ':' {
|
||||
self.de.next_token().expect_token(&[Token::Arrow])?;
|
||||
Ok((val, self))
|
||||
} else {
|
||||
Err(Error::ExpectedMapColon)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// `VariantAccess` is provided to the `Visitor` to give it the ability to see
|
||||
// the content of the single variant that it decided to deserialize.
|
||||
impl<'de, 'a> VariantAccess<'de> for Enum<'a, 'de> {
|
||||
type Error = Error;
|
||||
type Error = SerdeParseError;
|
||||
|
||||
// If the `Visitor` expected this variant to be a unit variant, the input
|
||||
// should have been the plain string case handled in `deserialize_enum`.
|
||||
fn unit_variant(self) -> Result<()> {
|
||||
Err(Error::ExpectedString)
|
||||
self.de.next_token().expect_token(&[Token::LiteralString])?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// Newtype variants are represented in JSON as `{ NAME: VALUE }` so
|
||||
// deserialize the value here.
|
||||
fn newtype_variant_seed<T>(self, seed: T) -> Result<T::Value>
|
||||
where
|
||||
T: DeserializeSeed<'de>,
|
||||
|
|
@ -586,8 +665,6 @@ impl<'de, 'a> VariantAccess<'de> for Enum<'a, 'de> {
|
|||
seed.deserialize(self.de)
|
||||
}
|
||||
|
||||
// Tuple variants are represented in JSON as `{ NAME: [DATA...] }` so
|
||||
// deserialize the sequence of data here.
|
||||
fn tuple_variant<V>(self, _len: usize, visitor: V) -> Result<V::Value>
|
||||
where
|
||||
V: Visitor<'de>,
|
||||
|
|
@ -595,8 +672,6 @@ impl<'de, 'a> VariantAccess<'de> for Enum<'a, 'de> {
|
|||
de::Deserializer::deserialize_seq(self.de, visitor)
|
||||
}
|
||||
|
||||
// Struct variants are represented in JSON as `{ NAME: { K: V, ... } }` so
|
||||
// deserialize the inner map here.
|
||||
fn struct_variant<V>(self, _fields: &'static [&'static str], visitor: V) -> Result<V::Value>
|
||||
where
|
||||
V: Visitor<'de>,
|
||||
|
|
@ -607,6 +682,42 @@ impl<'de, 'a> VariantAccess<'de> for Enum<'a, 'de> {
|
|||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use serde_derive::Deserialize;
|
||||
|
||||
fn from_str<'a, T>(source: &'a str) -> super::Result<T>
|
||||
where
|
||||
T: serde::Deserialize<'a>,
|
||||
{
|
||||
match super::from_str(source) {
|
||||
Ok(res) => Ok(res),
|
||||
Err(super::SerdeParseError::Spanned(err)) => {
|
||||
let with_source = err.with_source(source);
|
||||
eprintln!("{}", with_source);
|
||||
Err(super::SerdeParseError::Spanned(with_source.spanned()))
|
||||
}
|
||||
Err(err) => {
|
||||
eprintln!("{}", err);
|
||||
Err(err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_vec() {
|
||||
let j = r#"["a","b"]"#;
|
||||
let expected: Vec<String> = vec!["a".to_string(), "b".to_string()];
|
||||
assert_eq!(expected, from_str::<Vec<String>>(j).unwrap());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_vec_explicit_keys() {
|
||||
let j = r#"[0=>"a", 1=>"b"]"#;
|
||||
let expected: Vec<String> = vec!["a".to_string(), "b".to_string()];
|
||||
assert_eq!(expected, from_str::<Vec<String>>(j).unwrap());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_struct() {
|
||||
#[derive(Deserialize, PartialEq, Debug)]
|
||||
|
|
@ -615,7 +726,7 @@ fn test_struct() {
|
|||
seq: Vec<String>,
|
||||
}
|
||||
|
||||
let j = r#"{"int":1,"seq":["a","b"]}"#;
|
||||
let j = r#"["int"=>1,"seq"=>["a","b"]]"#;
|
||||
let expected = Test {
|
||||
int: 1,
|
||||
seq: vec!["a".to_owned(), "b".to_owned()],
|
||||
|
|
@ -623,12 +734,35 @@ fn test_struct() {
|
|||
assert_eq!(expected, from_str(j).unwrap());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_struct_nested() {
|
||||
#[derive(Deserialize, PartialEq, Debug)]
|
||||
struct Inner {
|
||||
a: f32,
|
||||
b: bool,
|
||||
}
|
||||
|
||||
#[derive(Deserialize, PartialEq, Debug)]
|
||||
struct Test {
|
||||
int: u32,
|
||||
nested: Inner,
|
||||
}
|
||||
|
||||
let j = r#"["int"=>1,"nested"=>["a" => 1.0, "b" => false]]"#;
|
||||
let expected = Test {
|
||||
int: 1,
|
||||
nested: Inner { a: 1.0, b: false },
|
||||
};
|
||||
assert_eq!(expected, from_str(j).unwrap());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_enum() {
|
||||
#[derive(Deserialize, PartialEq, Debug)]
|
||||
enum E {
|
||||
Unit,
|
||||
Newtype(u32),
|
||||
Newtype1(u32),
|
||||
Newtype2(u32),
|
||||
Tuple(u32, u32),
|
||||
Struct { a: u32 },
|
||||
}
|
||||
|
|
@ -637,15 +771,20 @@ fn test_enum() {
|
|||
let expected = E::Unit;
|
||||
assert_eq!(expected, from_str(j).unwrap());
|
||||
|
||||
let j = r#"{"Newtype":1}"#;
|
||||
let expected = E::Newtype(1);
|
||||
let j = r#"["Newtype1"=>1]"#;
|
||||
let expected = E::Newtype1(1);
|
||||
assert_eq!(expected, from_str(j).unwrap());
|
||||
|
||||
let j = r#"{"Tuple":[1,2]}"#;
|
||||
let j = r#"["Newtype2"=>1]"#;
|
||||
let expected = E::Newtype2(1);
|
||||
assert_eq!(expected, from_str(j).unwrap());
|
||||
|
||||
let j = r#"["Tuple"=>[1,2]]"#;
|
||||
let expected = E::Tuple(1, 2);
|
||||
assert_eq!(expected, from_str(j).unwrap());
|
||||
|
||||
let j = r#"{"Struct":{"a":1}}"#;
|
||||
let j = r#"["Struct"=>["a"=>1]]"#;
|
||||
let expected = E::Struct { a: 1 };
|
||||
assert_eq!(expected, from_str(j).unwrap());
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue