1
0
Fork 0
mirror of https://codeberg.org/icewind/bitbuffer.git synced 2026-06-03 16:44:06 +02:00

add feature to disable checking strings for valid utf8

This commit is contained in:
Robin Appelman 2019-03-08 22:28:06 +01:00
commit bb54b1c917
2 changed files with 68 additions and 48 deletions

View file

@ -17,4 +17,7 @@ bitstream_reader_derive = { version = "0.4", path = "bitstream_reader_derive" }
[dev-dependencies] [dev-dependencies]
maplit = "1.0.1" maplit = "1.0.1"
[features]
unchecked_utf8 = []
[workspace] [workspace]

View file

@ -8,10 +8,10 @@ use std::rc::Rc;
use num_traits::{Float, PrimInt}; use num_traits::{Float, PrimInt};
use crate::{ReadError, Result};
use crate::endianness::Endianness; use crate::endianness::Endianness;
use crate::is_signed::IsSigned; use crate::is_signed::IsSigned;
use crate::unchecked_primitive::{UncheckedPrimitiveFloat, UncheckedPrimitiveInt}; use crate::unchecked_primitive::{UncheckedPrimitiveFloat, UncheckedPrimitiveInt};
use crate::{ReadError, Result};
const USIZE_SIZE: usize = size_of::<usize>(); const USIZE_SIZE: usize = size_of::<usize>();
@ -35,8 +35,8 @@ const USIZE_SIZE: usize = size_of::<usize>();
/// # } /// # }
/// ``` /// ```
pub struct BitBuffer<E> pub struct BitBuffer<E>
where where
E: Endianness, E: Endianness,
{ {
bytes: Rc<Vec<u8>>, bytes: Rc<Vec<u8>>,
bit_len: usize, bit_len: usize,
@ -45,8 +45,8 @@ where
} }
impl<E> BitBuffer<E> impl<E> BitBuffer<E>
where where
E: Endianness, E: Endianness,
{ {
/// Create a new BitBuffer from a byte vector /// Create a new BitBuffer from a byte vector
/// ///
@ -73,8 +73,8 @@ where
} }
impl<E> BitBuffer<E> impl<E> BitBuffer<E>
where where
E: Endianness, E: Endianness,
{ {
/// The available number of bits in the buffer /// The available number of bits in the buffer
pub fn bit_len(&self) -> usize { pub fn bit_len(&self) -> usize {
@ -179,8 +179,8 @@ where
/// [`ReadError::TooManyBits`]: enum.ReadError.html#variant.TooManyBits /// [`ReadError::TooManyBits`]: enum.ReadError.html#variant.TooManyBits
#[inline] #[inline]
pub fn read_int<T>(&self, position: usize, count: usize) -> Result<T> pub fn read_int<T>(&self, position: usize, count: usize) -> Result<T>
where where
T: PrimInt + BitOrAssign + IsSigned + UncheckedPrimitiveInt, T: PrimInt + BitOrAssign + IsSigned + UncheckedPrimitiveInt,
{ {
let type_bit_size = size_of::<T>() * 8; let type_bit_size = size_of::<T>() * 8;
let usize_bit_size = size_of::<usize>() * 8; let usize_bit_size = size_of::<usize>() * 8;
@ -224,8 +224,8 @@ where
#[inline] #[inline]
fn read_fit_usize<T>(&self, position: usize, count: usize) -> T fn read_fit_usize<T>(&self, position: usize, count: usize) -> T
where where
T: PrimInt + BitOrAssign + IsSigned + UncheckedPrimitiveInt, T: PrimInt + BitOrAssign + IsSigned + UncheckedPrimitiveInt,
{ {
let type_bit_size = size_of::<T>() * 8; let type_bit_size = size_of::<T>() * 8;
let raw = self.read_usize(position, count); let raw = self.read_usize(position, count);
@ -238,8 +238,8 @@ where
} }
fn read_no_fit_usize<T>(&self, position: usize, count: usize) -> T fn read_no_fit_usize<T>(&self, position: usize, count: usize) -> T
where where
T: PrimInt + BitOrAssign + IsSigned + UncheckedPrimitiveInt, T: PrimInt + BitOrAssign + IsSigned + UncheckedPrimitiveInt,
{ {
let mut left_to_read = count; let mut left_to_read = count;
let mut acc = T::zero(); let mut acc = T::zero();
@ -265,8 +265,8 @@ where
} }
fn make_signed<T>(&self, value: T, count: usize) -> T fn make_signed<T>(&self, value: T, count: usize) -> T
where where
T: PrimInt + BitOrAssign + IsSigned + UncheckedPrimitiveInt, T: PrimInt + BitOrAssign + IsSigned + UncheckedPrimitiveInt,
{ {
if T::is_signed() { if T::is_signed() {
let sign_bit = value >> (count - 1) & T::one(); let sign_bit = value >> (count - 1) & T::one();
@ -349,6 +349,12 @@ where
/// ///
/// You can either read a fixed number of bytes, or a dynamic length null-terminated string /// You can either read a fixed number of bytes, or a dynamic length null-terminated string
/// ///
/// # Features
///
/// To disable the overhead of checking if the read bytes are valid you can enable the `unchecked_utf8`
/// feature of the crate to use `String::from_utf8_unchecked` instead of `String::from_utf8`
/// to create the string from the read bytes.
///
/// # Errors /// # Errors
/// ///
/// - [`ReadError::NotEnoughData`]: not enough bits available in the buffer /// - [`ReadError::NotEnoughData`]: not enough bits available in the buffer
@ -381,42 +387,53 @@ where
/// [`ReadError::NotEnoughData`]: enum.ReadError.html#variant.NotEnoughData /// [`ReadError::NotEnoughData`]: enum.ReadError.html#variant.NotEnoughData
/// [`ReadError::Utf8Error`]: enum.ReadError.html#variant.Utf8Error /// [`ReadError::Utf8Error`]: enum.ReadError.html#variant.Utf8Error
pub fn read_string(&self, position: usize, byte_len: Option<usize>) -> Result<String> { pub fn read_string(&self, position: usize, byte_len: Option<usize>) -> Result<String> {
let bytes = self.read_string_bytes(position, byte_len)?; match byte_len {
let raw_string = String::from_utf8(bytes)?; Some(byte_len) => {
if byte_len.is_some() { let bytes = self.read_bytes(position, byte_len)?;
Ok(raw_string.trim_end_matches(char::from(0)).to_owned()) let raw_string = if cfg!(feature = "unchecked_utf8") {
} else { unsafe {
Ok(raw_string) String::from_utf8_unchecked(bytes)
}
} else {
String::from_utf8(bytes)?
};
Ok(raw_string.trim_end_matches(char::from(0)).to_owned())
},
None => {
let bytes = self.read_string_bytes(position);
if cfg!(feature = "unchecked_utf8") {
unsafe {
Ok(String::from_utf8_unchecked(bytes))
}
} else {
String::from_utf8(bytes).map_err(ReadError::from)
}
}
} }
} }
fn read_string_bytes(&self, position: usize, byte_len: Option<usize>) -> Result<Vec<u8>> { fn read_string_bytes(&self, position: usize) -> Vec<u8> {
match byte_len { let mut acc = Vec::with_capacity(25);
Some(len) => return self.read_bytes(position, len), let mut pos = position;
None => { loop {
let mut acc = Vec::with_capacity(25); let read = min((USIZE_SIZE - 1) * 8, self.bit_len - pos);
let mut pos = position; let raw_bytes = self.read_usize(pos, read);
loop { let bytes: [u8; USIZE_SIZE] = if E::is_le() {
let read = min((USIZE_SIZE - 1) * 8, self.bit_len - pos); raw_bytes.to_le_bytes()
let raw_bytes = self.read_usize(pos, read); } else {
let bytes: [u8; USIZE_SIZE] = if E::is_le() { raw_bytes.to_be_bytes()
raw_bytes.to_le_bytes() };
} else { for i in 0..(USIZE_SIZE - 1) {
raw_bytes.to_be_bytes() // ony LE we use the first 7 bytes, on BE the last 7
}; let byte = if E::is_le() { bytes[i] } else { bytes[1 + i] };
for i in 0..(USIZE_SIZE - 1) {
// ony LE we use the first 7 bytes, on BE the last 7
let byte = if E::is_le() { bytes[i] } else { bytes[1 + i] };
if byte == 0 { if byte == 0 {
return Ok(acc); return acc;
}
acc.push(byte);
}
pos += read;
} }
acc.push(byte);
} }
}; pos += read;
}
} }
/// Read a sequence of bits from the buffer as float /// Read a sequence of bits from the buffer as float
@ -444,8 +461,8 @@ where
/// ///
/// [`ReadError::NotEnoughData`]: enum.ReadError.html#variant.NotEnoughData /// [`ReadError::NotEnoughData`]: enum.ReadError.html#variant.NotEnoughData
pub fn read_float<T>(&self, position: usize) -> Result<T> pub fn read_float<T>(&self, position: usize) -> Result<T>
where where
T: Float + UncheckedPrimitiveFloat, T: Float + UncheckedPrimitiveFloat,
{ {
let type_bit_size = size_of::<T>() * 8; let type_bit_size = size_of::<T>() * 8;
if position + type_bit_size > self.bit_len { if position + type_bit_size > self.bit_len {