mirror of
https://codeberg.org/icewind/bitbuffer.git
synced 2026-06-03 08:34:07 +02:00
fix pos when reading malformed utf8
This commit is contained in:
parent
4872cd62c4
commit
4d2ea4ee7c
5 changed files with 38 additions and 10 deletions
|
|
@ -1,6 +1,6 @@
|
||||||
[package]
|
[package]
|
||||||
name = "bitbuffer"
|
name = "bitbuffer"
|
||||||
version = "0.8.0"
|
version = "0.8.1"
|
||||||
authors = ["Robin Appelman <robin@icewind.nl>"]
|
authors = ["Robin Appelman <robin@icewind.nl>"]
|
||||||
edition = "2018"
|
edition = "2018"
|
||||||
description = "Reading bit sequences from a byte slice"
|
description = "Reading bit sequences from a byte slice"
|
||||||
|
|
|
||||||
|
|
@ -125,7 +125,7 @@ pub enum BitError {
|
||||||
},
|
},
|
||||||
/// The read slice of bytes are not valid utf8
|
/// The read slice of bytes are not valid utf8
|
||||||
#[error(display = "The read slice of bytes are not valid utf8: {}", _0)]
|
#[error(display = "The read slice of bytes are not valid utf8: {}", _0)]
|
||||||
Utf8Error(#[error(source)] Utf8Error),
|
Utf8Error(Utf8Error, usize),
|
||||||
/// The string that was requested to be written does not fit in the specified fixed length
|
/// The string that was requested to be written does not fit in the specified fixed length
|
||||||
#[error(
|
#[error(
|
||||||
display = "The string that was requested to be written does not fit in the specified fixed length, string is {} bytes long, while a size of {} has been specified",
|
display = "The string that was requested to be written does not fit in the specified fixed length, string is {} bytes long, while a size of {} has been specified",
|
||||||
|
|
@ -142,7 +142,7 @@ pub enum BitError {
|
||||||
|
|
||||||
impl From<FromUtf8Error> for BitError {
|
impl From<FromUtf8Error> for BitError {
|
||||||
fn from(err: FromUtf8Error) -> Self {
|
fn from(err: FromUtf8Error) -> Self {
|
||||||
BitError::from(err.utf8_error())
|
BitError::Utf8Error(err.utf8_error(), err.as_bytes().len())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -587,9 +587,11 @@ where
|
||||||
.trim_end_matches(char::from(0))
|
.trim_end_matches(char::from(0))
|
||||||
.to_string(),
|
.to_string(),
|
||||||
),
|
),
|
||||||
Cow::Borrowed(bytes) => {
|
Cow::Borrowed(bytes) => Cow::Borrowed(
|
||||||
Cow::Borrowed(std::str::from_utf8(bytes)?.trim_end_matches(char::from(0)))
|
std::str::from_utf8(bytes)
|
||||||
}
|
.map_err(|err| BitError::Utf8Error(err, bytes.len()))?
|
||||||
|
.trim_end_matches(char::from(0)),
|
||||||
|
),
|
||||||
};
|
};
|
||||||
Ok(string)
|
Ok(string)
|
||||||
}
|
}
|
||||||
|
|
@ -597,7 +599,10 @@ where
|
||||||
let bytes = self.read_string_bytes(position)?;
|
let bytes = self.read_string_bytes(position)?;
|
||||||
let string = match bytes {
|
let string = match bytes {
|
||||||
Cow::Owned(bytes) => Cow::Owned(String::from_utf8(bytes)?),
|
Cow::Owned(bytes) => Cow::Owned(String::from_utf8(bytes)?),
|
||||||
Cow::Borrowed(bytes) => Cow::Borrowed(std::str::from_utf8(bytes)?),
|
Cow::Borrowed(bytes) => Cow::Borrowed(
|
||||||
|
std::str::from_utf8(bytes)
|
||||||
|
.map_err(|err| BitError::Utf8Error(err, bytes.len()))?,
|
||||||
|
),
|
||||||
};
|
};
|
||||||
Ok(string)
|
Ok(string)
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -306,10 +306,10 @@ where
|
||||||
|
|
||||||
let result = self.buffer.read_string(self.pos, byte_len).map_err(|err| {
|
let result = self.buffer.read_string(self.pos, byte_len).map_err(|err| {
|
||||||
// still advance the stream on malformed utf8
|
// still advance the stream on malformed utf8
|
||||||
if let BitError::Utf8Error(err) = &err {
|
if let BitError::Utf8Error(_, len) = &err {
|
||||||
self.pos += match byte_len {
|
self.pos += match byte_len {
|
||||||
Some(len) => len * 8,
|
Some(len) => len * 8,
|
||||||
None => min((err.valid_up_to() + 1) * 8, max_length),
|
None => min((len + 1) * 8, max_length * 8),
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
err
|
err
|
||||||
|
|
|
||||||
|
|
@ -3,7 +3,7 @@ use std::num::NonZeroU16;
|
||||||
|
|
||||||
use maplit::hashmap;
|
use maplit::hashmap;
|
||||||
|
|
||||||
use bitbuffer::{BigEndian, BitRead, BitReadBuffer, BitReadStream, LittleEndian};
|
use bitbuffer::{BigEndian, BitError, BitRead, BitReadBuffer, BitReadStream, LittleEndian};
|
||||||
|
|
||||||
const BYTES: &'static [u8] = &[
|
const BYTES: &'static [u8] = &[
|
||||||
0b1011_0101,
|
0b1011_0101,
|
||||||
|
|
@ -473,3 +473,26 @@ fn test_to_owned_stream() {
|
||||||
assert_eq!(stream.bit_len(), owned.bit_len());
|
assert_eq!(stream.bit_len(), owned.bit_len());
|
||||||
assert_eq!(stream.bits_left(), owned.bits_left());
|
assert_eq!(stream.bits_left(), owned.bits_left());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_invalid_utf8() {
|
||||||
|
let bytes = vec![b'b', b'a', 129, b'c', 0, 0, 0];
|
||||||
|
let buffer = BitReadBuffer::new(&bytes, LittleEndian);
|
||||||
|
let mut stream = BitReadStream::new(buffer.clone());
|
||||||
|
|
||||||
|
assert!(matches!(
|
||||||
|
stream.read_string(None),
|
||||||
|
Err(BitError::Utf8Error(_, 4))
|
||||||
|
));
|
||||||
|
|
||||||
|
assert_eq!(stream.pos(), 5 * 8);
|
||||||
|
|
||||||
|
let mut stream = BitReadStream::new(buffer);
|
||||||
|
|
||||||
|
assert!(matches!(
|
||||||
|
stream.read_string(Some(6)),
|
||||||
|
Err(BitError::Utf8Error(_, 6))
|
||||||
|
));
|
||||||
|
|
||||||
|
assert_eq!(stream.pos(), 6 * 8);
|
||||||
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue