1
0
Fork 0
mirror of https://codeberg.org/icewind/bitbuffer.git synced 2026-06-03 16:44:06 +02:00

buffer string read improvements

This commit is contained in:
Robin Appelman 2025-07-14 20:06:45 +02:00
commit 353e8ab25c

View file

@ -154,6 +154,25 @@ where
slice,
}
}
#[inline(always)]
fn bounds_check(&self, position: usize, bits_requested: usize) -> Result<()> {
if position + bits_requested > self.bit_len() {
if position > self.bit_len() {
Err(BitError::IndexOutOfBounds {
pos: position,
size: self.bit_len(),
})
} else {
Err(BitError::NotEnoughData {
requested: bits_requested,
bits_left: self.bit_len() - position,
})
}
} else {
Ok(())
}
}
}
impl<E> BitReadBuffer<'static, E>
@ -372,19 +391,7 @@ where
}
if position + count + USIZE_BIT_SIZE > self.bit_len() {
if position + count > self.bit_len() {
return if position > self.bit_len() {
Err(BitError::IndexOutOfBounds {
pos: position,
size: self.bit_len(),
})
} else {
Err(BitError::NotEnoughData {
requested: count,
bits_left: self.bit_len() - position,
})
};
}
self.bounds_check(position, count)?;
Ok(unsafe { self.read_int_unchecked(position, count, true) })
} else {
Ok(unsafe { self.read_int_unchecked(position, count, false) })
@ -501,9 +508,8 @@ where
/// [`ReadError::NotEnoughData`]: enum.ReadError.html#variant.NotEnoughData
#[inline]
pub fn read_bytes(&self, position: usize, byte_count: usize) -> Result<Cow<'a, [u8]>> {
let mut output = Vec::new();
let result = self.read_bytes_into(position, byte_count, &mut output)?;
Ok(result.into_cow(output))
self.bounds_check(position, byte_count * 8)?;
Ok(unsafe { self.read_bytes_unchecked(position, byte_count) })
}
/// Read a series of bytes from the buffer, using an existing buffer
@ -549,48 +555,19 @@ where
byte_count: usize,
output: &mut Vec<u8>,
) -> Result<MaybeBorrowed<'a, [u8]>> {
if position + byte_count * 8 > self.bit_len() {
if position > self.bit_len() {
return Err(BitError::IndexOutOfBounds {
pos: position,
size: self.bit_len(),
});
} else {
return Err(BitError::NotEnoughData {
requested: byte_count * 8,
bits_left: self.bit_len() - position,
});
}
}
self.bounds_check(position, byte_count * 8)?;
Ok(unsafe { self.read_bytes_unchecked_into(position, byte_count, output) })
}
#[doc(hidden)]
#[inline]
pub unsafe fn read_bytes_unchecked(&self, position: usize, byte_count: usize) -> Cow<'a, [u8]> {
let mut output = Vec::new();
let result = self.read_bytes_unchecked_into(position, byte_count, &mut output);
result.into_cow(output)
}
#[doc(hidden)]
#[inline]
pub unsafe fn read_bytes_unchecked_into(
#[inline(always)]
unsafe fn read_bytes_unchecked_owned(
&self,
position: usize,
byte_count: usize,
output: &mut Vec<u8>,
) -> MaybeBorrowed<'a, [u8]> {
) {
let shift = position & 7;
if shift == 0 {
let byte_pos = position / 8;
return MaybeBorrowed::Borrowed(&self.slice[byte_pos..byte_pos + byte_count]);
}
output.clear();
output.reserve(byte_count);
let mut byte_left = byte_count;
let mut read_pos = position / 8;
@ -617,6 +594,42 @@ where
pos += 8;
}
}
}
#[doc(hidden)]
#[inline]
pub unsafe fn read_bytes_unchecked(&self, position: usize, byte_count: usize) -> Cow<'a, [u8]> {
let shift = position & 7;
if shift == 0 {
let byte_pos = position / 8;
return Cow::Borrowed(&self.slice[byte_pos..byte_pos + byte_count]);
}
let mut output = Vec::with_capacity(byte_count);
self.read_bytes_unchecked_owned(position, byte_count, &mut output);
Cow::Owned(output)
}
#[doc(hidden)]
#[inline]
pub unsafe fn read_bytes_unchecked_into(
&self,
position: usize,
byte_count: usize,
output: &mut Vec<u8>,
) -> MaybeBorrowed<'a, [u8]> {
let shift = position & 7;
if shift == 0 {
let byte_pos = position / 8;
return MaybeBorrowed::Borrowed(&self.slice[byte_pos..byte_pos + byte_count]);
}
output.clear();
output.reserve(byte_count);
self.read_bytes_unchecked_owned(position, byte_count, output);
MaybeBorrowed::Owned
}
@ -660,9 +673,14 @@ where
/// [`ReadError::Utf8Error`]: enum.ReadError.html#variant.Utf8Error
#[inline]
pub fn read_string(&self, position: usize, byte_len: Option<usize>) -> Result<Cow<'a, str>> {
let mut output = String::new();
let result = self.read_string_into(position, byte_len, &mut output)?;
Ok(result.into_cow(output))
let shift = position & 7;
if shift == 0 {
return Ok(Cow::Borrowed(self.read_string_aligned(position, byte_len)?));
}
let output = Vec::with_capacity(64);
let string = self.read_string_unaligned(position, byte_len, output)?;
Ok(Cow::Owned(string))
}
/// Read a series of bytes from the buffer as string.
@ -713,103 +731,114 @@ where
byte_len: Option<usize>,
output: &mut String,
) -> Result<MaybeBorrowed<'a, str>> {
let mut taken_output = take(output).into_bytes();
match byte_len {
Some(byte_len) => {
let bytes = self.read_bytes_into(position, byte_len, &mut taken_output)?;
let result = match bytes {
MaybeBorrowed::Owned => {
*output = String::from_utf8(taken_output)?
.trim_end_matches(char::from(0))
.to_string();
MaybeBorrowed::Owned
}
MaybeBorrowed::Borrowed(bytes) => MaybeBorrowed::Borrowed(
std::str::from_utf8(bytes)
.map_err(|err| BitError::Utf8Error(err, bytes.len()))?
.trim_end_matches(char::from(0)),
),
};
Ok(result)
}
None => {
let bytes = self.read_string_bytes(position, &mut taken_output)?;
let result = match bytes {
MaybeBorrowed::Owned => {
*output = String::from_utf8(taken_output)?;
MaybeBorrowed::Owned
}
MaybeBorrowed::Borrowed(bytes) => MaybeBorrowed::Borrowed(
std::str::from_utf8(bytes)
.map_err(|err| BitError::Utf8Error(err, bytes.len()))?,
),
};
Ok(result)
}
}
}
#[inline]
fn find_null_byte(&self, byte_index: usize) -> usize {
memchr::memchr(0, &self.slice[byte_index..])
.map(|index| index + byte_index)
.unwrap_or(self.slice.len()) // due to padding we always have 0 bytes at the end
}
#[inline]
fn read_string_bytes<'output>(
&self,
position: usize,
buffer: &'output mut Vec<u8>,
) -> Result<MaybeBorrowed<'a, [u8]>> {
let shift = position & 7;
if shift == 0 {
let byte_index = position / 8;
Ok(MaybeBorrowed::Borrowed(
&self.slice[byte_index..self.find_null_byte(byte_index)],
))
} else {
buffer.clear();
buffer.reserve(64);
if E::is_le() {
let mut byte_index = position / 8;
loop {
// note: if less then a usize worth of data is left in the buffer, read_usize_bytes
// will automatically pad with null bytes, triggering the loop termination
// thus no separate logic for dealing with the end of the bytes is required
//
// This is safe because the final usize is filled with 0's, thus triggering the exit clause
// before reading any out of bounds
let shifted = unsafe { self.read_shifted_usize(byte_index, shift, true) };
return Ok(MaybeBorrowed::Borrowed(
self.read_string_aligned(position, byte_len)?,
));
}
let has_null = contains_zero_byte_non_top(shifted);
let bytes: [u8; USIZE_SIZE] = shifted.to_le_bytes();
let usable_bytes = &bytes[0..USIZE_SIZE - 1];
let taken_output = take(output).into_bytes();
*output = self.read_string_unaligned(position, byte_len, taken_output)?;
if has_null {
for i in 0..USIZE_SIZE - 1 {
if usable_bytes[i] == 0 {
buffer.extend_from_slice(&usable_bytes[0..i]);
return Ok(MaybeBorrowed::Owned);
}
Ok(MaybeBorrowed::Owned)
}
#[inline(always)]
fn read_string_unaligned(
&self,
position: usize,
byte_len: Option<usize>,
mut output: Vec<u8>,
) -> Result<String> {
Ok(match byte_len {
Some(byte_len) => {
self.bounds_check(position, byte_len * 8)?;
unsafe { self.read_bytes_unchecked_owned(position, byte_len, &mut output) };
String::from_utf8(output)?
.trim_end_matches(char::from(0))
.to_string()
}
None => {
self.read_string_bytes(position, &mut output)?;
String::from_utf8(output)?
}
})
}
#[inline(always)]
fn read_string_aligned(&self, position: usize, byte_len: Option<usize>) -> Result<&'a str> {
let byte_pos = position / 8;
let str = match byte_len {
Some(byte_len) => {
self.bounds_check(position, byte_len * 8)?;
let bytes = unsafe { self.slice.get_unchecked(byte_pos..byte_pos + byte_len) };
str::from_utf8(bytes)
.map_err(|err| BitError::Utf8Error(err, byte_len))?
.trim_end_matches(char::from(0))
}
None => {
if byte_pos > self.byte_len() {
return Err(BitError::IndexOutOfBounds {
pos: position,
size: self.bit_len(),
});
}
let slice = unsafe { self.slice.get_unchecked(byte_pos..) };
let byte_len = memchr::memchr(0, slice).unwrap_or_default();
let bytes = unsafe { self.slice.get_unchecked(byte_pos..byte_pos + byte_len) };
str::from_utf8(bytes).map_err(|err| BitError::Utf8Error(err, byte_len))?
}
};
Ok(str)
}
#[inline]
fn read_string_bytes(&self, position: usize, buffer: &mut Vec<u8>) -> Result<()> {
let shift = position & 7;
buffer.clear();
buffer.reserve(64);
if E::is_le() {
let mut byte_index = position / 8;
loop {
// note: if less then a usize worth of data is left in the buffer, read_usize_bytes
// will automatically pad with null bytes, triggering the loop termination
// thus no separate logic for dealing with the end of the bytes is required
//
// This is safe because the final usize is filled with 0's, thus triggering the exit clause
// before reading any out of bounds
let shifted = unsafe { self.read_shifted_usize(byte_index, shift, true) };
let has_null = contains_zero_byte_non_top(shifted);
let bytes: [u8; USIZE_SIZE] = shifted.to_le_bytes();
let usable_bytes = &bytes[0..USIZE_SIZE - 1];
if has_null {
for i in 0..USIZE_SIZE - 1 {
if usable_bytes[i] == 0 {
buffer.extend_from_slice(&usable_bytes[0..i]);
return Ok(());
}
}
buffer.extend_from_slice(&usable_bytes[0..USIZE_SIZE - 1]);
byte_index += USIZE_SIZE - 1;
}
} else {
let mut pos = position;
loop {
let byte = self.read_int::<u8>(pos, 8)?;
pos += 8;
if byte == 0 {
return Ok(MaybeBorrowed::Owned);
} else {
buffer.push(byte);
}
buffer.extend_from_slice(&usable_bytes[0..USIZE_SIZE - 1]);
byte_index += USIZE_SIZE - 1;
}
} else {
let mut pos = position;
loop {
let byte = self.read_int::<u8>(pos, 8)?;
pos += 8;
if byte == 0 {
return Ok(());
} else {
buffer.push(byte);
}
}
}
@ -846,19 +875,7 @@ where
{
let type_bit_size = size_of::<T>() * 8;
if position + type_bit_size + USIZE_BIT_SIZE > self.bit_len() {
if position + type_bit_size > self.bit_len() {
if position > self.bit_len() {
return Err(BitError::IndexOutOfBounds {
pos: position,
size: self.bit_len(),
});
} else {
return Err(BitError::NotEnoughData {
requested: size_of::<T>() * 8,
bits_left: self.bit_len() - position,
});
}
}
self.bounds_check(position, type_bit_size)?;
Ok(unsafe { self.read_float_unchecked(position, true) })
} else {
Ok(unsafe { self.read_float_unchecked(position, false) })
@ -884,12 +901,7 @@ where
}
pub(crate) fn get_sub_buffer(&self, bit_len: usize) -> Result<Self> {
if bit_len > self.bit_len() {
return Err(BitError::NotEnoughData {
requested: bit_len,
bits_left: self.bit_len(),
});
}
self.bounds_check(0, bit_len)?;
Ok(BitReadBuffer {
bytes: self.bytes.clone(),
@ -901,12 +913,7 @@ where
/// Truncate the buffer to a given bit length
pub fn truncate(&mut self, bit_len: usize) -> Result<()> {
if bit_len > self.bit_len() {
return Err(BitError::NotEnoughData {
requested: bit_len,
bits_left: self.bit_len(),
});
}
self.bounds_check(bit_len, 0)?;
self.bit_len = bit_len;
Ok(())