1
0
Fork 0
mirror of https://codeberg.org/demostf/parser.git synced 2026-06-03 18:24:05 +02:00

better mallformed utf8 handling

This commit is contained in:
Robin Appelman 2022-06-13 22:09:28 +02:00
commit c7d83f6be0
9 changed files with 2661 additions and 2598 deletions

129
codegen/Cargo.lock generated
View file

@ -32,15 +32,6 @@ dependencies = [
"memchr",
]
[[package]]
name = "ansi_term"
version = "0.12.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d52a9bb7ec0cf484c551830a7ce27bd20d67eac647e1befb56b0be4ee39a55d2"
dependencies = [
"winapi",
]
[[package]]
name = "atty"
version = "0.2.14"
@ -260,30 +251,12 @@ version = "0.2.112"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1b03d17f364a3a042d5e5d46b053bbbf82c92c9430c592dd4c064dc6ee997125"
[[package]]
name = "log"
version = "0.4.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "51b9bbe6c47d51fc3e1a9b945965946b4c44142ab8792c50835a980d362c2710"
dependencies = [
"cfg-if",
]
[[package]]
name = "main_error"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "155db5e86c6e45ee456bf32fad5a290ee1f7151c2faca27ea27097568da67d1a"
[[package]]
name = "matchers"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8263075bb86c5a1b1427b5ae862e8889656f126e9f77c484496e8b47cf5c5558"
dependencies = [
"regex-automata",
]
[[package]]
name = "memchr"
version = "2.4.1"
@ -447,12 +420,6 @@ dependencies = [
"syn",
]
[[package]]
name = "pin-project-lite"
version = "0.2.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e280fbe77cc62c91527259e9442153f4688736748d24660126286329742b4c6c"
[[package]]
name = "prettyplease"
version = "0.1.0"
@ -535,15 +502,6 @@ dependencies = [
"regex-syntax",
]
[[package]]
name = "regex-automata"
version = "0.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132"
dependencies = [
"regex-syntax",
]
[[package]]
name = "regex-syntax"
version = "0.6.25"
@ -619,21 +577,6 @@ dependencies = [
"syn",
]
[[package]]
name = "sharded-slab"
version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "900fba806f70c630b0a382d0d825e17a0f19fcd059a2ade1ff237bcddf446b31"
dependencies = [
"lazy_static",
]
[[package]]
name = "smallvec"
version = "1.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1ecab6c735a6bb4139c0caafd0cc3635748bbb3acf4550e8138122099251f309"
[[package]]
name = "snap"
version = "1.0.5"
@ -752,8 +695,6 @@ dependencies = [
"serde_repr",
"snap",
"steamid-ng",
"tracing",
"tracing-subscriber",
]
[[package]]
@ -793,15 +734,6 @@ dependencies = [
"syn",
]
[[package]]
name = "thread_local"
version = "1.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8018d24e04c95ac8790716a5987d0fec4f8b27249ffa0f7d33f1369bdfb88cbd"
dependencies = [
"once_cell",
]
[[package]]
name = "toml"
version = "0.5.8"
@ -811,67 +743,6 @@ dependencies = [
"serde",
]
[[package]]
name = "tracing"
version = "0.1.29"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "375a639232caf30edfc78e8d89b2d4c375515393e7af7e16f01cd96917fb2105"
dependencies = [
"cfg-if",
"pin-project-lite",
"tracing-attributes",
"tracing-core",
]
[[package]]
name = "tracing-attributes"
version = "0.1.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f4f480b8f81512e825f337ad51e94c1eb5d3bbdf2b363dcd01e2b19a9ffe3f8e"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "tracing-core"
version = "0.1.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1f4ed65637b8390770814083d20756f87bfa2c21bf2f110babdc5438351746e4"
dependencies = [
"lazy_static",
]
[[package]]
name = "tracing-log"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a6923477a48e41c1951f1999ef8bb5a3023eb723ceadafe78ffb65dc366761e3"
dependencies = [
"lazy_static",
"log",
"tracing-core",
]
[[package]]
name = "tracing-subscriber"
version = "0.3.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5d81bfa81424cc98cb034b837c985b7a290f592e5b4322f353f94a0ab0f9f594"
dependencies = [
"ansi_term",
"lazy_static",
"matchers",
"regex",
"sharded-slab",
"smallvec",
"thread_local",
"tracing",
"tracing-core",
"tracing-log",
]
[[package]]
name = "unicode-xid"
version = "0.2.2"

View file

@ -35,7 +35,7 @@ fn should_box_event(name: &str) -> bool {
fn get_type_name(ty: GameEventValueType) -> &'static str {
match ty {
GameEventValueType::String => "String",
GameEventValueType::String => "MaybeUtf8String",
GameEventValueType::Float => "f32",
GameEventValueType::Boolean => "bool",
GameEventValueType::Byte => "u8",
@ -230,6 +230,7 @@ pub fn generate_game_events(demo: Demo) -> TokenStream {
use crate::{ParseError, Result};
use bitbuffer::{BitRead, LittleEndian, BitWrite, BitWriteStream};
use serde::{Deserialize, Serialize};
use crate::demo::data::MaybeUtf8String;
);
let event_definitions = events.iter().map(|event| {

View file

@ -1,3 +1,89 @@
pub mod userinfo;
use bitbuffer::{BitRead, BitReadStream, BitWrite, BitWriteStream, Endianness};
use serde::{Deserialize, Serialize};
use std::fmt::{Debug, Display, Formatter};
pub use userinfo::UserInfo;
#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
#[derive(Eq, PartialEq, Serialize, Deserialize, Clone)]
pub enum MaybeUtf8String {
Valid(String),
Invalid(Vec<u8>),
}
impl Default for MaybeUtf8String {
fn default() -> Self {
MaybeUtf8String::Valid(String::new())
}
}
impl AsRef<str> for MaybeUtf8String {
fn as_ref(&self) -> &str {
match self {
MaybeUtf8String::Valid(s) => s.as_str(),
MaybeUtf8String::Invalid(_) => "-- Malformed utf8 --",
}
}
}
impl Debug for MaybeUtf8String {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
match self {
MaybeUtf8String::Valid(s) => Debug::fmt(s, f),
MaybeUtf8String::Invalid(b) => f
.debug_struct("MaybeUtf8String::Invalid")
.field("data", b)
.finish(),
}
}
}
impl Display for MaybeUtf8String {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
match self {
MaybeUtf8String::Valid(s) => Display::fmt(s, f),
MaybeUtf8String::Invalid(_) => write!(f, "-- Malformed utf8 --"),
}
}
}
impl MaybeUtf8String {
pub fn as_bytes(&self) -> &[u8] {
match self {
MaybeUtf8String::Valid(s) => s.as_bytes(),
MaybeUtf8String::Invalid(b) => b.as_ref(),
}
}
}
impl<'a, E: Endianness> BitRead<'a, E> for MaybeUtf8String {
fn read(stream: &mut BitReadStream<'a, E>) -> bitbuffer::Result<Self> {
match String::read(stream) {
Ok(str) => Ok(MaybeUtf8String::Valid(str)),
Err(bitbuffer::BitError::Utf8Error(_, size)) => {
stream.set_pos(stream.pos() - size * 8)?;
let data = stream.read_sized(size)?;
Ok(MaybeUtf8String::Invalid(data))
}
Err(e) => Err(e),
}
}
}
impl<E: Endianness> BitWrite<E> for MaybeUtf8String {
fn write(&self, stream: &mut BitWriteStream<E>) -> bitbuffer::Result<()> {
stream.write_bytes(self.as_bytes())?;
stream.write(&0u8)
}
}
impl Into<String> for MaybeUtf8String {
fn into(self) -> String {
match self {
MaybeUtf8String::Valid(s) => s,
MaybeUtf8String::Invalid(_) => "-- Malformed utf8 --".into(),
}
}
}

File diff suppressed because it is too large Load diff

View file

@ -1,5 +1,5 @@
pub use super::gameevent_gen::{GameEvent, GameEventType};
use crate::demo::handle_utf8_error;
use crate::demo::data::MaybeUtf8String;
use crate::demo::message::gameevent::GameEventTypeId;
use crate::{GameEventError, Result, Stream};
use bitbuffer::{BitRead, BitWrite, BitWriteStream, LittleEndian};
@ -59,7 +59,7 @@ pub enum GameEventValueType {
#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub enum GameEventValue {
String(String),
String(MaybeUtf8String),
Float(f32),
Long(u32),
Short(u16),
@ -70,9 +70,7 @@ pub enum GameEventValue {
fn read_event_value(stream: &mut Stream, definition: &GameEventEntry) -> Result<GameEventValue> {
Ok(match definition.kind {
GameEventValueType::String => {
GameEventValue::String(stream.read().or_else(handle_utf8_error)?)
}
GameEventValueType::String => GameEventValue::String(stream.read()?),
GameEventValueType::Float => GameEventValue::Float(stream.read()?),
GameEventValueType::Long => GameEventValue::Long(stream.read()?),
GameEventValueType::Short => GameEventValue::Short(stream.read()?),
@ -121,6 +119,12 @@ impl EventValue for String {
}
}
impl EventValue for MaybeUtf8String {
fn value_type() -> GameEventValueType {
GameEventValueType::String
}
}
impl EventValue for f32 {
fn value_type() -> GameEventValueType {
GameEventValueType::Float

View file

@ -1,5 +1,6 @@
use crate::Stream;
/// Messages that consists only of primitives and string and can be derived
use crate::demo::data::MaybeUtf8String;
use crate::Stream;
use bitbuffer::{BitRead, BitWrite, LittleEndian};
use serde::{Deserialize, Serialize};
@ -49,7 +50,7 @@ pub struct SignOnStateMessage {
#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]
#[derive(BitRead, BitWrite, Debug, PartialEq, Serialize, Deserialize, Clone)]
pub struct PrintMessage {
pub value: String,
pub value: MaybeUtf8String,
}
#[cfg_attr(feature = "schema", derive(schemars::JsonSchema))]

View file

@ -1,8 +1,7 @@
use bitbuffer::{BitError, BitRead, BitWrite, BitWriteStream, LittleEndian};
use serde::{Deserialize, Serialize};
use crate::demo::handle_utf8_error;
use crate::demo::data::MaybeUtf8String;
use crate::demo::parser::analyser::UserId;
use crate::{ReadResult, Stream};
@ -228,14 +227,14 @@ pub struct SayText2Message {
pub client: UserId,
pub raw: u8,
pub kind: ChatMessageKind,
pub from: Option<String>,
pub text: String,
pub from: Option<MaybeUtf8String>,
pub text: MaybeUtf8String,
}
impl SayText2Message {
pub fn plain_text(&self) -> String {
// 1: normal, 2: old colors, 3: team, 4: location, 5 achievement, 6 custom
let mut text = self.text.replace(|c| c <= char::from(6), "");
let mut text = self.text.to_string().replace(|c| c <= char::from(6), "");
// 7: 6-char hex
while let Some(pos) = text.chars().enumerate().find_map(|(index, c)| {
if c == char::from(7) {
@ -272,18 +271,18 @@ impl BitRead<'_, LittleEndian> for SayText2Message {
fn read(stream: &mut Stream) -> ReadResult<Self> {
let client = UserId(stream.read()?);
let raw = stream.read()?;
let (kind, from, text): (ChatMessageKind, Option<String>, String) =
let (kind, from, text): (ChatMessageKind, Option<MaybeUtf8String>, MaybeUtf8String) =
if stream.read::<u8>()? == 1 {
stream.set_pos(stream.pos() - 8)?;
let text: String = stream.read().or_else(handle_utf8_error)?;
let text: MaybeUtf8String = stream.read()?;
(ChatMessageKind::ChatAll, None, text)
} else {
stream.set_pos(stream.pos() - 8)?;
let kind = stream.read()?;
let from = stream.read().or_else(handle_utf8_error)?;
let text = stream.read().or_else(handle_utf8_error)?;
let from = stream.read()?;
let text = stream.read()?;
// ends with 2 0 bytes?
if stream.bits_left() >= 16 {
@ -307,7 +306,7 @@ impl BitWrite<LittleEndian> for SayText2Message {
u8::from(self.client).write(stream)?;
self.raw.write(stream)?;
if let Some(from) = self.from.as_deref() {
if let Some(from) = self.from.as_ref().map(|s| s.as_ref()) {
self.kind.write(stream)?;
from.write(stream)?;
self.text.write(stream)?;

View file

@ -1,5 +1,4 @@
use crate::ReadResult;
use bitbuffer::{BitError, BitReadBuffer, BitReadStream, LittleEndian};
use bitbuffer::{BitReadBuffer, BitReadStream, LittleEndian};
pub mod data;
pub mod gameevent_gen;
@ -40,10 +39,3 @@ impl Demo<'static> {
Demo { stream }
}
}
pub(crate) fn handle_utf8_error(error: BitError) -> ReadResult<String> {
match error {
BitError::Utf8Error(_, _) => Ok("-- Malformed utf8 --".into()),
_ => Err(error),
}
}

View file

@ -29,7 +29,11 @@ impl ChatMessage {
pub fn from_message(message: &SayText2Message, tick: u32) -> Self {
ChatMessage {
kind: message.kind,
from: message.from.clone().unwrap_or_default(),
from: message
.from
.as_ref()
.map(|s| s.to_string())
.unwrap_or_default(),
text: message.plain_text(),
tick,
}
@ -328,7 +332,7 @@ impl Death {
assister,
tick,
killer: UserId::from(event.attacker),
weapon: event.weapon.clone(),
weapon: event.weapon.to_string(),
victim: UserId::from(event.user_id),
}
}
@ -416,7 +420,7 @@ impl Analyser {
if let UserMessage::SayText2(text_message) = message {
if text_message.kind == ChatMessageKind::NameChange {
if let Some(from) = text_message.from.clone() {
self.change_name(from, text_message.text.clone());
self.change_name(from.into(), text_message.plain_text());
}
} else {
self.state