This commit is contained in:
Robin Appelman 2023-12-15 15:51:53 +01:00
commit f492a63ab1
19 changed files with 1885 additions and 0 deletions

33
src/entry/array.rs Normal file
View file

@ -0,0 +1,33 @@
use super::Entry;
use serde::Serialize;
use std::ops::{Deref, DerefMut};
/// An array of entries (items that have the same key).
#[derive(Clone, PartialEq, Eq, Debug, Serialize)]
pub struct Array(Vec<Entry>);
impl From<Entry> for Array {
fn from(value: Entry) -> Self {
Array(vec![value])
}
}
impl Into<Entry> for Array {
fn into(self) -> Entry {
Entry::Array(self)
}
}
impl Deref for Array {
type Target = Vec<Entry>;
fn deref(&self) -> &Self::Target {
&self.0
}
}
impl DerefMut for Array {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.0
}
}

155
src/entry/mod.rs Normal file
View file

@ -0,0 +1,155 @@
use std::slice;
/// The kinds of entry.
#[derive(Clone, PartialEq, Eq, Debug, Serialize)]
pub enum Entry {
/// A table.
Table(Table),
/// An array (entries with the same key).
Array(Array),
/// A statement (the values starting with #).
Statement(Statement),
/// A value.
Value(Value),
}
impl Entry {
/// Lookup an entry with a path.
pub fn lookup<S: AsRef<str>>(&self, path: S) -> Option<&Entry> {
let mut current = self;
for name in path.as_ref().split('.') {
if let Some(entry) = current.get(name.trim()) {
current = entry;
} else {
return None;
}
}
Some(current)
}
/// Try to get the named entry.
pub fn get<S: AsRef<str>>(&self, name: S) -> Option<&Entry> {
match self {
&Entry::Table(ref value) => value.get(name.as_ref()),
&Entry::Array(ref value) => name
.as_ref()
.parse::<usize>()
.ok()
.and_then(|i| value.get(i)),
_ => None,
}
}
/// Try to convert the entry to the given type.
pub fn to<T: Parse>(&self) -> Option<T> {
if let &Entry::Value(ref value) = self {
value.to::<T>()
} else {
None
}
}
/// Try to take the entry as a table.
pub fn as_table(&self) -> Option<&Table> {
if let &Entry::Table(ref value) = self {
Some(value)
} else {
None
}
}
/// Try to take the entry as a slice.
pub fn as_slice(&self) -> Option<&[Entry]> {
if let &Entry::Array(ref value) = self {
Some(value.as_slice())
} else {
unsafe { Some(slice::from_raw_parts(self, 1)) }
}
}
/// Try to take the entry as a statement.
pub fn as_statement(&self) -> Option<&Statement> {
if let &Entry::Statement(ref value) = self {
Some(value)
} else {
None
}
}
/// Try to take the entry as a value.
pub fn as_value(&self) -> Option<&Value> {
if let &Entry::Value(ref value) = self {
Some(value)
} else {
None
}
}
/// Try to take the entry as a string.
pub fn as_str(&self) -> Option<&str> {
match self {
&Entry::Value(ref value) => Some(&*value),
&Entry::Statement(ref value) => Some(&*value),
_ => None,
}
}
}
/// Parsable types.
pub trait Parse: Sized {
/// Try to parse the string.
fn parse(string: &str) -> Option<Self>;
}
macro_rules! from_str {
(for) => ();
(for $ty:ident $($rest:tt)*) => (
from_str!($ty);
from_str!(for $($rest)*);
);
($ty:ident) => (
impl Parse for $ty {
fn parse(string: &str) -> Option<Self> {
string.parse::<$ty>().ok()
}
}
);
}
use serde::Serialize;
use std::net::{IpAddr, Ipv4Addr, Ipv6Addr, SocketAddr, SocketAddrV4, SocketAddrV6};
from_str!(for IpAddr Ipv4Addr Ipv6Addr SocketAddr SocketAddrV4 SocketAddrV6);
from_str!(for i8 i16 i32 i64 isize u8 u16 u32 u64 usize f32 f64);
impl Parse for bool {
fn parse(string: &str) -> Option<Self> {
match string {
"0" => Some(false),
"1" => Some(true),
v => v.parse::<bool>().ok(),
}
}
}
mod table;
pub use table::Table;
mod array;
pub use array::Array;
mod statement;
pub use statement::Statement;
mod value;
pub use value::Value;

28
src/entry/statement.rs Normal file
View file

@ -0,0 +1,28 @@
use super::Entry;
use serde::Serialize;
use std::borrow::Cow;
use std::ops::Deref;
/// A statement.
#[derive(Clone, PartialEq, Eq, Debug, Serialize)]
pub struct Statement(String);
impl From<Cow<'_, str>> for Statement {
fn from(value: Cow<'_, str>) -> Self {
Statement(value.into())
}
}
impl Into<Entry> for Statement {
fn into(self) -> Entry {
Entry::Statement(self)
}
}
impl Deref for Statement {
type Target = str;
fn deref(&self) -> &Self::Target {
&self.0
}
}

81
src/entry/table.rs Normal file
View file

@ -0,0 +1,81 @@
use super::{Array, Entry, Statement, Value};
use crate::error::StatementInTableError;
use crate::{Event, Item, Reader, Result};
use serde::Serialize;
use std::collections::HashMap;
use std::ops::Deref;
/// A table of entries.
#[derive(Clone, PartialEq, Eq, Debug, Serialize)]
pub struct Table(HashMap<String, Entry>);
fn insert(map: &mut HashMap<String, Entry>, key: String, value: Entry) {
if !map.contains_key(&key) {
map.insert(key, value);
return;
}
if let Some(&mut Entry::Array(ref mut array)) = map.get_mut(&key) {
array.push(value);
return;
}
let mut array = Array::from(map.remove(&key).unwrap());
array.push(value);
map.insert(key, array.into());
}
impl Table {
/// Load a table from the given `Reader`.
pub fn load(reader: &mut Reader) -> Result<Table> {
let mut map = HashMap::new();
loop {
match reader.event()? {
Event::Entry {
key: Item::Statement { .. },
span,
..
} => {
return Err(
StatementInTableError::new(span.into(), reader.content.into()).into(),
)
}
Event::Entry {
key: Item::Value { content: key, .. },
value: Item::Statement { content: value, .. },
..
} => insert(&mut map, key.into(), Statement::from(value).into()),
Event::Entry {
key: Item::Value { content: key, .. },
value: Item::Value { content: value, .. },
..
} => insert(&mut map, key.into(), Value::from(value).into()),
Event::GroupStart { name, .. } => {
insert(&mut map, name.into(), Table::load(reader)?.into())
}
Event::GroupEnd { .. } | Event::End { .. } => break,
}
}
return Ok(Table(map));
}
}
impl Into<Entry> for Table {
fn into(self) -> Entry {
Entry::Table(self)
}
}
impl Deref for Table {
type Target = HashMap<String, Entry>;
fn deref(&self) -> &Self::Target {
&self.0
}
}

34
src/entry/value.rs Normal file
View file

@ -0,0 +1,34 @@
use super::{Entry, Parse};
use serde::Serialize;
use std::borrow::Cow;
use std::ops::Deref;
#[derive(Clone, PartialEq, Eq, Debug, Serialize)]
pub struct Value(String);
impl From<Cow<'_, str>> for Value {
fn from(value: Cow<'_, str>) -> Value {
Value(value.into())
}
}
impl Into<Entry> for Value {
fn into(self) -> Entry {
Entry::Value(self)
}
}
impl Deref for Value {
type Target = str;
fn deref(&self) -> &Self::Target {
&self.0
}
}
impl Value {
/// Try to convert the value to the given type.
pub fn to<T: Parse>(&self) -> Option<T> {
T::parse(&self.0)
}
}

129
src/error.rs Normal file
View file

@ -0,0 +1,129 @@
use crate::Token;
use miette::{Diagnostic, SourceSpan};
use std::error::Error;
use std::fmt::{Display, Formatter};
use thiserror::Error;
/// Any error that occurred while trying to parse the vdf file
#[derive(Error, Debug, Clone, Diagnostic)]
pub enum VdfError {
#[error(transparent)]
#[diagnostic(transparent)]
/// A token that wasn't expected was found while parsing
UnexpectedToken(#[from] UnexpectedTokenError),
#[error(transparent)]
#[diagnostic(transparent)]
/// No valid token found
NoValidToken(#[from] NoValidTokenError),
#[error(transparent)]
#[diagnostic(transparent)]
/// An unexpected statement was found inside a table
StatementInTable(#[from] StatementInTableError),
}
struct ExpectedTokens<'a>(&'a [Token]);
impl Display for ExpectedTokens<'_> {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
let mut tokens = self.0.iter();
if let Some(token) = tokens.next() {
write!(f, "{}", token)?;
} else {
return Ok(());
}
for token in tokens {
write!(f, ", {}", token)?;
}
Ok(())
}
}
/// A token that wasn't expected was found while parsing
#[derive(Debug, Clone, Diagnostic)]
#[diagnostic(code(php_literal_parser::unexpected_token))]
pub struct UnexpectedTokenError {
#[label("Expected {}", ExpectedTokens(self.expected))]
err_span: SourceSpan,
pub expected: &'static [Token],
pub found: Option<Token>,
#[source_code]
src: String,
}
impl UnexpectedTokenError {
pub fn new(expected: &'static [Token], found: Option<Token>, err_span: SourceSpan, src: String) -> Self {
UnexpectedTokenError {
err_span,
expected,
found,
src
}
}
}
impl Display for UnexpectedTokenError {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
match &self.found {
Some(token) => write!(
f,
"Unexpected token, found {} expected one of {}",
token,
ExpectedTokens(self.expected)
),
None => write!(
f,
"Unexpected end of input expected one of {}",
ExpectedTokens(self.expected)
),
}
}
}
impl Error for UnexpectedTokenError {}
/// A token that wasn't expected was found while parsing
#[derive(Debug, Clone, Diagnostic)]
#[diagnostic(code(php_literal_parser::unexpected_token))]
pub struct NoValidTokenError {
#[label("Expected {}", ExpectedTokens(self.expected))]
err_span: SourceSpan,
pub expected: &'static [Token],
#[source_code]
src: String,
}
impl NoValidTokenError {
pub fn new(expected: &'static [Token], err_span: SourceSpan, src: String) -> Self {
NoValidTokenError { err_span, expected ,src }
}
}
impl Display for NoValidTokenError {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
write!(
f,
"No valid token found, expected one of {}",
ExpectedTokens(self.expected)
)
}
}
impl Error for NoValidTokenError {}
/// An unexpected statement was found inside a table
#[derive(Debug, Clone, Diagnostic, Error)]
#[diagnostic(code(php_literal_parser::unexpected_token))]
#[error("An unexpected statement was found inside a table")]
pub struct StatementInTableError {
#[label("Unexpected statement")]
err_span: SourceSpan,
#[source_code]
src: String,
}
impl StatementInTableError {
pub fn new(err_span: SourceSpan, src: String) -> Self {
StatementInTableError { err_span, src }
}
}

10
src/lib.rs Normal file
View file

@ -0,0 +1,10 @@
pub mod entry;
mod error;
mod parser;
mod reader;
pub use error::VdfError;
pub type Result<T, E = VdfError> = std::result::Result<T, E>;
pub use parser::Token;
pub use reader::{Event, Item, Reader};

102
src/parser.rs Normal file
View file

@ -0,0 +1,102 @@
use logos::Logos;
use parse_display::Display;
use std::str;
/// Parser token.
#[derive(PartialEq, Debug, Logos, Display, Clone)]
#[logos(skip r"[ \t\n\f\r]+")] // whitespace
#[logos(skip r"//[^\n]*")] // comments
pub enum Token {
/// A group is starting.
#[token("{")]
#[display("start of group")]
GroupStart,
/// A group is ending.
#[token("}")]
#[display("end of group")]
GroupEnd,
/// An enclosed or bare item.
#[regex("(\"([^\"\\\\]|\\\\.)*\")|([^# \t\n{}\"][^ \"\t\n]*)", priority = 0)]
#[display("item")]
Item,
/// An enclosed or bare statement.
#[regex("(\"#([^\"\\\\]|\\\\.)*\")|(#[^ \"\t\n]+)")]
#[display("statement")]
Statement,
}
#[cfg(test)]
mod tests {
use super::Token;
use logos::Logos;
fn get_token(input: &str) -> Option<Result<Token, <Token as Logos>::Error>> {
let mut lex = Token::lexer(input);
lex.next()
}
fn get_tokens(input: &str) -> Result<Vec<(Token, &str)>, <Token as Logos>::Error> {
Token::lexer(input)
.spanned()
.map(|(res, span)| res.map(|token| (token, &input[span])))
// .map(|res| dbg!(res))
.collect()
}
#[test]
fn next() {
assert_eq!(get_token("test"), Some(Ok(Token::Item)));
assert_eq!(get_token("\"test\""), Some(Ok(Token::Item)));
assert_eq!(get_token("\"\""), Some(Ok(Token::Item)));
assert_eq!(get_token("\"\" "), Some(Ok(Token::Item)));
assert_eq!(get_token("#test"), Some(Ok(Token::Statement)));
assert_eq!(get_token("\"#test\""), Some(Ok(Token::Statement)));
assert_eq!(get_token("{"), Some(Ok(Token::GroupStart)));
assert_eq!(get_token("}"), Some(Ok(Token::GroupEnd)));
assert_eq!(get_token("//test more"), None);
assert_eq!(get_token("test"), Some(Ok(Token::Item)));
assert_eq!(get_token("#test"), Some(Ok(Token::Statement)));
assert_eq!(get_token("lol wut"), Some(Ok(Token::Item)));
assert_eq!(get_token("#lol wut"), Some(Ok(Token::Statement)));
assert_eq!(get_token("lol{"), Some(Ok(Token::Item)));
assert_eq!(get_token("#lol{"), Some(Ok(Token::Statement)));
assert_eq!(get_token("lol}"), Some(Ok(Token::Item)));
assert_eq!(get_token("#lol}"), Some(Ok(Token::Statement)));
assert_eq!(get_token("\"test\""), Some(Ok(Token::Item)));
assert_eq!(get_token("\"#test\""), Some(Ok(Token::Statement)));
assert_eq!(get_token("\"te\\\"st\""), Some(Ok(Token::Item)));
assert_eq!(get_token("\"te\\st\""), Some(Ok(Token::Item)));
assert_eq!(get_token("\"#te\\\"st\""), Some(Ok(Token::Statement)));
}
#[test]
fn tokenize() {
assert_eq!(
get_tokens(
r#"foo { // eol comment
"asd" "bar"
// a comment
#include other
empty ""
}"#
),
Ok(vec![
(Token::Item, "foo"),
(Token::GroupStart, "{"),
(Token::Item, r#""asd""#),
(Token::Item, r#""bar""#),
(Token::Statement, r#"#include"#),
(Token::Item, r#"other"#),
(Token::Item, r#"empty"#),
(Token::Item, r#""""#),
(Token::GroupEnd, "}")
])
)
}
}

194
src/reader.rs Normal file
View file

@ -0,0 +1,194 @@
use super::{Result, Token};
use crate::error::{NoValidTokenError, UnexpectedTokenError};
use logos::{Lexer, Span, SpannedIter};
use std::borrow::Cow;
/// Kinds of item.
#[derive(Clone, PartialEq, Eq, Debug)]
pub enum Item<'a> {
/// A statement, the ones starting with #.
Statement { content: Cow<'a, str>, span: Span },
/// A value.
Value { content: Cow<'a, str>, span: Span },
}
impl<'a> Item<'a> {
pub fn span(&self) -> Span {
match self {
Item::Statement { span, .. } => span.clone(),
Item::Value { span, .. } => span.clone(),
}
}
pub fn into_content(self) -> Cow<'a, str> {
match self {
Item::Statement { content, .. } => content,
Item::Value { content, .. } => content,
}
}
}
/// Reader event.
#[derive(Clone, PartialEq, Eq, Debug)]
pub enum Event<'a> {
/// A group with the given name is starting.
GroupStart { name: Cow<'a, str>, span: Span },
/// A group has ended.
GroupEnd { span: Span },
/// An entry.
Entry {
key: Item<'a>,
value: Item<'a>,
span: Span,
},
/// EOF has been reached.
End { span: Span },
}
impl Event<'_> {
#[allow(dead_code)]
pub fn span(&self) -> Span {
match self {
Event::GroupStart { span, .. } => span.clone(),
Event::GroupEnd { span, .. } => span.clone(),
Event::Entry { span, .. } => span.clone(),
Event::End { span, .. } => span.clone(),
}
}
}
/// A VDF token reader.
pub struct Reader<'a> {
pub(crate) content: &'a str,
lexer: SpannedIter<'a, Token>,
}
impl<'a> From<&'a str> for Reader<'a> {
fn from(content: &'a str) -> Self {
Reader {
content,
lexer: Lexer::new(content).spanned(),
}
}
}
impl<'a> Reader<'a> {
/// Get the next event, this does copies.
#[allow(dead_code)]
pub fn event(&mut self) -> Result<Event> {
let key = match self.lexer.next() {
None => {
return Ok(Event::End {
span: self.lexer.span(),
})
}
Some((Err(_), span)) => {
return Err(NoValidTokenError::new(
&[Token::Item, Token::GroupEnd, Token::Statement],
span.into(),
self.content.into(),
)
.into());
}
Some((Ok(Token::GroupEnd), span)) => return Ok(Event::GroupEnd { span }),
Some((Ok(Token::GroupStart), span)) => {
return Err(UnexpectedTokenError::new(
&[Token::Item, Token::GroupEnd, Token::Statement],
Some(Token::GroupStart),
span.into(),
self.content.into(),
)
.into())
}
Some((Ok(Token::Item), span)) => Item::Value {
content: string(self.lexer.slice()),
span,
},
Some((Ok(Token::Statement), span)) => Item::Statement {
content: string(self.lexer.slice()),
span,
},
};
let value = match self.lexer.next() {
None => {
return Err(UnexpectedTokenError::new(
&[Token::Item, Token::GroupEnd, Token::Statement],
None,
self.lexer.span().into(),
self.content.into(),
)
.into());
}
Some((Err(_), span)) => {
return Err(NoValidTokenError::new(
&[Token::Item, Token::GroupEnd, Token::Statement],
span.into(),
self.content.into(),
)
.into());
}
Some((Ok(Token::GroupEnd), span)) => {
return Err(UnexpectedTokenError::new(
&[Token::Item, Token::GroupStart, Token::Statement],
Some(Token::GroupEnd),
span.into(),
self.content.into(),
)
.into())
}
Some((Ok(Token::GroupStart), span)) => {
return Ok(Event::GroupStart {
name: key.into_content(),
span,
})
}
Some((Ok(Token::Item), span)) => Item::Value {
content: string(self.lexer.slice()),
span,
},
Some((Ok(Token::Statement), span)) => Item::Statement {
content: string(self.lexer.slice()),
span,
},
};
let span = key.span().start..value.span().end;
Ok(Event::Entry { key, value, span })
}
}
fn string(source: &str) -> Cow<str> {
if source.contains(r#"\""#) || source.contains(r#"\\"#) {
let mut buffer = source.bytes();
let mut string = Vec::with_capacity(buffer.len());
while let Some(byte) = buffer.next() {
if byte == b'\\' {
match buffer.next() {
Some(b'\\') => string.push(b'\\'),
Some(b'"') => string.push(b'"'),
Some(byte) => string.extend_from_slice(&[b'\\', byte]),
None => break,
}
} else {
string.push(byte);
}
}
String::from_utf8(string).unwrap().into()
} else {
source.into()
}
}