mirror of
https://codeberg.org/icewind/bitbuffer.git
synced 2026-06-03 16:44:06 +02:00
buffer string read improvements
This commit is contained in:
parent
8a96a5dc41
commit
191a5a7cb9
1 changed files with 167 additions and 160 deletions
|
|
@ -154,6 +154,25 @@ where
|
|||
slice,
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn bounds_check(&self, position: usize, bits_requested: usize) -> Result<()> {
|
||||
if position + bits_requested > self.bit_len() {
|
||||
if position > self.bit_len() {
|
||||
Err(BitError::IndexOutOfBounds {
|
||||
pos: position,
|
||||
size: self.bit_len(),
|
||||
})
|
||||
} else {
|
||||
Err(BitError::NotEnoughData {
|
||||
requested: bits_requested,
|
||||
bits_left: self.bit_len() - position,
|
||||
})
|
||||
}
|
||||
} else {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<E> BitReadBuffer<'static, E>
|
||||
|
|
@ -372,19 +391,7 @@ where
|
|||
}
|
||||
|
||||
if position + count + USIZE_BIT_SIZE > self.bit_len() {
|
||||
if position + count > self.bit_len() {
|
||||
return if position > self.bit_len() {
|
||||
Err(BitError::IndexOutOfBounds {
|
||||
pos: position,
|
||||
size: self.bit_len(),
|
||||
})
|
||||
} else {
|
||||
Err(BitError::NotEnoughData {
|
||||
requested: count,
|
||||
bits_left: self.bit_len() - position,
|
||||
})
|
||||
};
|
||||
}
|
||||
self.bounds_check(position, count)?;
|
||||
Ok(unsafe { self.read_int_unchecked(position, count, true) })
|
||||
} else {
|
||||
Ok(unsafe { self.read_int_unchecked(position, count, false) })
|
||||
|
|
@ -501,9 +508,8 @@ where
|
|||
/// [`ReadError::NotEnoughData`]: enum.ReadError.html#variant.NotEnoughData
|
||||
#[inline]
|
||||
pub fn read_bytes(&self, position: usize, byte_count: usize) -> Result<Cow<'a, [u8]>> {
|
||||
let mut output = Vec::new();
|
||||
let result = self.read_bytes_into(position, byte_count, &mut output)?;
|
||||
Ok(result.into_cow(output))
|
||||
self.bounds_check(position, byte_count * 8)?;
|
||||
Ok(unsafe { self.read_bytes_unchecked(position, byte_count) })
|
||||
}
|
||||
|
||||
/// Read a series of bytes from the buffer, using an existing buffer
|
||||
|
|
@ -549,48 +555,19 @@ where
|
|||
byte_count: usize,
|
||||
output: &mut Vec<u8>,
|
||||
) -> Result<MaybeBorrowed<'a, [u8]>> {
|
||||
if position + byte_count * 8 > self.bit_len() {
|
||||
if position > self.bit_len() {
|
||||
return Err(BitError::IndexOutOfBounds {
|
||||
pos: position,
|
||||
size: self.bit_len(),
|
||||
});
|
||||
} else {
|
||||
return Err(BitError::NotEnoughData {
|
||||
requested: byte_count * 8,
|
||||
bits_left: self.bit_len() - position,
|
||||
});
|
||||
}
|
||||
}
|
||||
self.bounds_check(position, byte_count * 8)?;
|
||||
|
||||
Ok(unsafe { self.read_bytes_unchecked_into(position, byte_count, output) })
|
||||
}
|
||||
|
||||
#[doc(hidden)]
|
||||
#[inline]
|
||||
pub unsafe fn read_bytes_unchecked(&self, position: usize, byte_count: usize) -> Cow<'a, [u8]> {
|
||||
let mut output = Vec::new();
|
||||
let result = self.read_bytes_unchecked_into(position, byte_count, &mut output);
|
||||
result.into_cow(output)
|
||||
}
|
||||
|
||||
#[doc(hidden)]
|
||||
#[inline]
|
||||
pub unsafe fn read_bytes_unchecked_into(
|
||||
#[inline(always)]
|
||||
unsafe fn read_bytes_unchecked_owned(
|
||||
&self,
|
||||
position: usize,
|
||||
byte_count: usize,
|
||||
output: &mut Vec<u8>,
|
||||
) -> MaybeBorrowed<'a, [u8]> {
|
||||
) {
|
||||
let shift = position & 7;
|
||||
|
||||
if shift == 0 {
|
||||
let byte_pos = position / 8;
|
||||
return MaybeBorrowed::Borrowed(&self.slice[byte_pos..byte_pos + byte_count]);
|
||||
}
|
||||
|
||||
output.clear();
|
||||
output.reserve(byte_count);
|
||||
let mut byte_left = byte_count;
|
||||
let mut read_pos = position / 8;
|
||||
|
||||
|
|
@ -617,6 +594,42 @@ where
|
|||
pos += 8;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[doc(hidden)]
|
||||
#[inline]
|
||||
pub unsafe fn read_bytes_unchecked(&self, position: usize, byte_count: usize) -> Cow<'a, [u8]> {
|
||||
let shift = position & 7;
|
||||
|
||||
if shift == 0 {
|
||||
let byte_pos = position / 8;
|
||||
return Cow::Borrowed(&self.slice[byte_pos..byte_pos + byte_count]);
|
||||
}
|
||||
|
||||
let mut output = Vec::with_capacity(byte_count);
|
||||
self.read_bytes_unchecked_owned(position, byte_count, &mut output);
|
||||
|
||||
Cow::Owned(output)
|
||||
}
|
||||
|
||||
#[doc(hidden)]
|
||||
#[inline]
|
||||
pub unsafe fn read_bytes_unchecked_into(
|
||||
&self,
|
||||
position: usize,
|
||||
byte_count: usize,
|
||||
output: &mut Vec<u8>,
|
||||
) -> MaybeBorrowed<'a, [u8]> {
|
||||
let shift = position & 7;
|
||||
|
||||
if shift == 0 {
|
||||
let byte_pos = position / 8;
|
||||
return MaybeBorrowed::Borrowed(&self.slice[byte_pos..byte_pos + byte_count]);
|
||||
}
|
||||
|
||||
output.clear();
|
||||
output.reserve(byte_count);
|
||||
self.read_bytes_unchecked_owned(position, byte_count, output);
|
||||
|
||||
MaybeBorrowed::Owned
|
||||
}
|
||||
|
|
@ -660,9 +673,14 @@ where
|
|||
/// [`ReadError::Utf8Error`]: enum.ReadError.html#variant.Utf8Error
|
||||
#[inline]
|
||||
pub fn read_string(&self, position: usize, byte_len: Option<usize>) -> Result<Cow<'a, str>> {
|
||||
let mut output = String::new();
|
||||
let result = self.read_string_into(position, byte_len, &mut output)?;
|
||||
Ok(result.into_cow(output))
|
||||
let shift = position & 7;
|
||||
if shift == 0 {
|
||||
return Ok(Cow::Borrowed(self.read_string_aligned(position, byte_len)?));
|
||||
}
|
||||
|
||||
let output = Vec::with_capacity(64);
|
||||
let string = self.read_string_unaligned(position, byte_len, output)?;
|
||||
Ok(Cow::Owned(string))
|
||||
}
|
||||
|
||||
/// Read a series of bytes from the buffer as string.
|
||||
|
|
@ -713,103 +731,114 @@ where
|
|||
byte_len: Option<usize>,
|
||||
output: &mut String,
|
||||
) -> Result<MaybeBorrowed<'a, str>> {
|
||||
let mut taken_output = take(output).into_bytes();
|
||||
match byte_len {
|
||||
Some(byte_len) => {
|
||||
let bytes = self.read_bytes_into(position, byte_len, &mut taken_output)?;
|
||||
|
||||
let result = match bytes {
|
||||
MaybeBorrowed::Owned => {
|
||||
*output = String::from_utf8(taken_output)?
|
||||
.trim_end_matches(char::from(0))
|
||||
.to_string();
|
||||
MaybeBorrowed::Owned
|
||||
}
|
||||
MaybeBorrowed::Borrowed(bytes) => MaybeBorrowed::Borrowed(
|
||||
std::str::from_utf8(bytes)
|
||||
.map_err(|err| BitError::Utf8Error(err, bytes.len()))?
|
||||
.trim_end_matches(char::from(0)),
|
||||
),
|
||||
};
|
||||
Ok(result)
|
||||
}
|
||||
None => {
|
||||
let bytes = self.read_string_bytes(position, &mut taken_output)?;
|
||||
let result = match bytes {
|
||||
MaybeBorrowed::Owned => {
|
||||
*output = String::from_utf8(taken_output)?;
|
||||
MaybeBorrowed::Owned
|
||||
}
|
||||
MaybeBorrowed::Borrowed(bytes) => MaybeBorrowed::Borrowed(
|
||||
std::str::from_utf8(bytes)
|
||||
.map_err(|err| BitError::Utf8Error(err, bytes.len()))?,
|
||||
),
|
||||
};
|
||||
Ok(result)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn find_null_byte(&self, byte_index: usize) -> usize {
|
||||
memchr::memchr(0, &self.slice[byte_index..])
|
||||
.map(|index| index + byte_index)
|
||||
.unwrap_or(self.slice.len()) // due to padding we always have 0 bytes at the end
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn read_string_bytes<'output>(
|
||||
&self,
|
||||
position: usize,
|
||||
buffer: &'output mut Vec<u8>,
|
||||
) -> Result<MaybeBorrowed<'a, [u8]>> {
|
||||
let shift = position & 7;
|
||||
if shift == 0 {
|
||||
let byte_index = position / 8;
|
||||
Ok(MaybeBorrowed::Borrowed(
|
||||
&self.slice[byte_index..self.find_null_byte(byte_index)],
|
||||
))
|
||||
} else {
|
||||
buffer.clear();
|
||||
buffer.reserve(64);
|
||||
if E::is_le() {
|
||||
let mut byte_index = position / 8;
|
||||
loop {
|
||||
// note: if less then a usize worth of data is left in the buffer, read_usize_bytes
|
||||
// will automatically pad with null bytes, triggering the loop termination
|
||||
// thus no separate logic for dealing with the end of the bytes is required
|
||||
//
|
||||
// This is safe because the final usize is filled with 0's, thus triggering the exit clause
|
||||
// before reading any out of bounds
|
||||
let shifted = unsafe { self.read_shifted_usize(byte_index, shift, true) };
|
||||
return Ok(MaybeBorrowed::Borrowed(
|
||||
self.read_string_aligned(position, byte_len)?,
|
||||
));
|
||||
}
|
||||
|
||||
let has_null = contains_zero_byte_non_top(shifted);
|
||||
let bytes: [u8; USIZE_SIZE] = shifted.to_le_bytes();
|
||||
let usable_bytes = &bytes[0..USIZE_SIZE - 1];
|
||||
let taken_output = take(output).into_bytes();
|
||||
*output = self.read_string_unaligned(position, byte_len, taken_output)?;
|
||||
|
||||
if has_null {
|
||||
for i in 0..USIZE_SIZE - 1 {
|
||||
if usable_bytes[i] == 0 {
|
||||
buffer.extend_from_slice(&usable_bytes[0..i]);
|
||||
return Ok(MaybeBorrowed::Owned);
|
||||
}
|
||||
Ok(MaybeBorrowed::Owned)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn read_string_unaligned(
|
||||
&self,
|
||||
position: usize,
|
||||
byte_len: Option<usize>,
|
||||
mut output: Vec<u8>,
|
||||
) -> Result<String> {
|
||||
Ok(match byte_len {
|
||||
Some(byte_len) => {
|
||||
self.bounds_check(position, byte_len * 8)?;
|
||||
unsafe { self.read_bytes_unchecked_owned(position, byte_len, &mut output) };
|
||||
|
||||
String::from_utf8(output)?
|
||||
.trim_end_matches(char::from(0))
|
||||
.to_string()
|
||||
}
|
||||
None => {
|
||||
self.read_string_bytes(position, &mut output)?;
|
||||
String::from_utf8(output)?
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn read_string_aligned(&self, position: usize, byte_len: Option<usize>) -> Result<&'a str> {
|
||||
let byte_pos = position / 8;
|
||||
let str = match byte_len {
|
||||
Some(byte_len) => {
|
||||
self.bounds_check(position, byte_len * 8)?;
|
||||
let bytes = unsafe { self.slice.get_unchecked(byte_pos..byte_pos + byte_len) };
|
||||
str::from_utf8(bytes)
|
||||
.map_err(|err| BitError::Utf8Error(err, byte_len))?
|
||||
.trim_end_matches(char::from(0))
|
||||
}
|
||||
None => {
|
||||
if byte_pos > self.byte_len() {
|
||||
return Err(BitError::IndexOutOfBounds {
|
||||
pos: position,
|
||||
size: self.bit_len(),
|
||||
});
|
||||
}
|
||||
|
||||
let slice = unsafe { self.slice.get_unchecked(byte_pos..) };
|
||||
let byte_len = memchr::memchr(0, slice).unwrap_or_default();
|
||||
|
||||
let bytes = unsafe { self.slice.get_unchecked(byte_pos..byte_pos + byte_len) };
|
||||
str::from_utf8(bytes).map_err(|err| BitError::Utf8Error(err, byte_len))?
|
||||
}
|
||||
};
|
||||
|
||||
Ok(str)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn read_string_bytes(&self, position: usize, buffer: &mut Vec<u8>) -> Result<()> {
|
||||
let shift = position & 7;
|
||||
buffer.clear();
|
||||
buffer.reserve(64);
|
||||
if E::is_le() {
|
||||
let mut byte_index = position / 8;
|
||||
loop {
|
||||
// note: if less then a usize worth of data is left in the buffer, read_usize_bytes
|
||||
// will automatically pad with null bytes, triggering the loop termination
|
||||
// thus no separate logic for dealing with the end of the bytes is required
|
||||
//
|
||||
// This is safe because the final usize is filled with 0's, thus triggering the exit clause
|
||||
// before reading any out of bounds
|
||||
let shifted = unsafe { self.read_shifted_usize(byte_index, shift, true) };
|
||||
|
||||
let has_null = contains_zero_byte_non_top(shifted);
|
||||
let bytes: [u8; USIZE_SIZE] = shifted.to_le_bytes();
|
||||
let usable_bytes = &bytes[0..USIZE_SIZE - 1];
|
||||
|
||||
if has_null {
|
||||
for i in 0..USIZE_SIZE - 1 {
|
||||
if usable_bytes[i] == 0 {
|
||||
buffer.extend_from_slice(&usable_bytes[0..i]);
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
|
||||
buffer.extend_from_slice(&usable_bytes[0..USIZE_SIZE - 1]);
|
||||
|
||||
byte_index += USIZE_SIZE - 1;
|
||||
}
|
||||
} else {
|
||||
let mut pos = position;
|
||||
loop {
|
||||
let byte = self.read_int::<u8>(pos, 8)?;
|
||||
pos += 8;
|
||||
if byte == 0 {
|
||||
return Ok(MaybeBorrowed::Owned);
|
||||
} else {
|
||||
buffer.push(byte);
|
||||
}
|
||||
|
||||
buffer.extend_from_slice(&usable_bytes[0..USIZE_SIZE - 1]);
|
||||
|
||||
byte_index += USIZE_SIZE - 1;
|
||||
}
|
||||
} else {
|
||||
let mut pos = position;
|
||||
loop {
|
||||
let byte = self.read_int::<u8>(pos, 8)?;
|
||||
pos += 8;
|
||||
if byte == 0 {
|
||||
return Ok(());
|
||||
} else {
|
||||
buffer.push(byte);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -846,19 +875,7 @@ where
|
|||
{
|
||||
let type_bit_size = size_of::<T>() * 8;
|
||||
if position + type_bit_size + USIZE_BIT_SIZE > self.bit_len() {
|
||||
if position + type_bit_size > self.bit_len() {
|
||||
if position > self.bit_len() {
|
||||
return Err(BitError::IndexOutOfBounds {
|
||||
pos: position,
|
||||
size: self.bit_len(),
|
||||
});
|
||||
} else {
|
||||
return Err(BitError::NotEnoughData {
|
||||
requested: size_of::<T>() * 8,
|
||||
bits_left: self.bit_len() - position,
|
||||
});
|
||||
}
|
||||
}
|
||||
self.bounds_check(position, type_bit_size)?;
|
||||
Ok(unsafe { self.read_float_unchecked(position, true) })
|
||||
} else {
|
||||
Ok(unsafe { self.read_float_unchecked(position, false) })
|
||||
|
|
@ -884,12 +901,7 @@ where
|
|||
}
|
||||
|
||||
pub(crate) fn get_sub_buffer(&self, bit_len: usize) -> Result<Self> {
|
||||
if bit_len > self.bit_len() {
|
||||
return Err(BitError::NotEnoughData {
|
||||
requested: bit_len,
|
||||
bits_left: self.bit_len(),
|
||||
});
|
||||
}
|
||||
self.bounds_check(0, bit_len)?;
|
||||
|
||||
Ok(BitReadBuffer {
|
||||
bytes: self.bytes.clone(),
|
||||
|
|
@ -901,12 +913,7 @@ where
|
|||
|
||||
/// Truncate the buffer to a given bit length
|
||||
pub fn truncate(&mut self, bit_len: usize) -> Result<()> {
|
||||
if bit_len > self.bit_len() {
|
||||
return Err(BitError::NotEnoughData {
|
||||
requested: bit_len,
|
||||
bits_left: self.bit_len(),
|
||||
});
|
||||
}
|
||||
self.bounds_check(bit_len, 0)?;
|
||||
|
||||
self.bit_len = bit_len;
|
||||
Ok(())
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue