mirror of
https://codeberg.org/icewind/bitbuffer.git
synced 2026-06-03 16:44:06 +02:00
buffer string read improvements
This commit is contained in:
parent
774d820aaf
commit
353e8ab25c
1 changed files with 167 additions and 160 deletions
|
|
@ -154,6 +154,25 @@ where
|
||||||
slice,
|
slice,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
|
fn bounds_check(&self, position: usize, bits_requested: usize) -> Result<()> {
|
||||||
|
if position + bits_requested > self.bit_len() {
|
||||||
|
if position > self.bit_len() {
|
||||||
|
Err(BitError::IndexOutOfBounds {
|
||||||
|
pos: position,
|
||||||
|
size: self.bit_len(),
|
||||||
|
})
|
||||||
|
} else {
|
||||||
|
Err(BitError::NotEnoughData {
|
||||||
|
requested: bits_requested,
|
||||||
|
bits_left: self.bit_len() - position,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<E> BitReadBuffer<'static, E>
|
impl<E> BitReadBuffer<'static, E>
|
||||||
|
|
@ -372,19 +391,7 @@ where
|
||||||
}
|
}
|
||||||
|
|
||||||
if position + count + USIZE_BIT_SIZE > self.bit_len() {
|
if position + count + USIZE_BIT_SIZE > self.bit_len() {
|
||||||
if position + count > self.bit_len() {
|
self.bounds_check(position, count)?;
|
||||||
return if position > self.bit_len() {
|
|
||||||
Err(BitError::IndexOutOfBounds {
|
|
||||||
pos: position,
|
|
||||||
size: self.bit_len(),
|
|
||||||
})
|
|
||||||
} else {
|
|
||||||
Err(BitError::NotEnoughData {
|
|
||||||
requested: count,
|
|
||||||
bits_left: self.bit_len() - position,
|
|
||||||
})
|
|
||||||
};
|
|
||||||
}
|
|
||||||
Ok(unsafe { self.read_int_unchecked(position, count, true) })
|
Ok(unsafe { self.read_int_unchecked(position, count, true) })
|
||||||
} else {
|
} else {
|
||||||
Ok(unsafe { self.read_int_unchecked(position, count, false) })
|
Ok(unsafe { self.read_int_unchecked(position, count, false) })
|
||||||
|
|
@ -501,9 +508,8 @@ where
|
||||||
/// [`ReadError::NotEnoughData`]: enum.ReadError.html#variant.NotEnoughData
|
/// [`ReadError::NotEnoughData`]: enum.ReadError.html#variant.NotEnoughData
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn read_bytes(&self, position: usize, byte_count: usize) -> Result<Cow<'a, [u8]>> {
|
pub fn read_bytes(&self, position: usize, byte_count: usize) -> Result<Cow<'a, [u8]>> {
|
||||||
let mut output = Vec::new();
|
self.bounds_check(position, byte_count * 8)?;
|
||||||
let result = self.read_bytes_into(position, byte_count, &mut output)?;
|
Ok(unsafe { self.read_bytes_unchecked(position, byte_count) })
|
||||||
Ok(result.into_cow(output))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Read a series of bytes from the buffer, using an existing buffer
|
/// Read a series of bytes from the buffer, using an existing buffer
|
||||||
|
|
@ -549,48 +555,19 @@ where
|
||||||
byte_count: usize,
|
byte_count: usize,
|
||||||
output: &mut Vec<u8>,
|
output: &mut Vec<u8>,
|
||||||
) -> Result<MaybeBorrowed<'a, [u8]>> {
|
) -> Result<MaybeBorrowed<'a, [u8]>> {
|
||||||
if position + byte_count * 8 > self.bit_len() {
|
self.bounds_check(position, byte_count * 8)?;
|
||||||
if position > self.bit_len() {
|
|
||||||
return Err(BitError::IndexOutOfBounds {
|
|
||||||
pos: position,
|
|
||||||
size: self.bit_len(),
|
|
||||||
});
|
|
||||||
} else {
|
|
||||||
return Err(BitError::NotEnoughData {
|
|
||||||
requested: byte_count * 8,
|
|
||||||
bits_left: self.bit_len() - position,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(unsafe { self.read_bytes_unchecked_into(position, byte_count, output) })
|
Ok(unsafe { self.read_bytes_unchecked_into(position, byte_count, output) })
|
||||||
}
|
}
|
||||||
|
|
||||||
#[doc(hidden)]
|
#[inline(always)]
|
||||||
#[inline]
|
unsafe fn read_bytes_unchecked_owned(
|
||||||
pub unsafe fn read_bytes_unchecked(&self, position: usize, byte_count: usize) -> Cow<'a, [u8]> {
|
|
||||||
let mut output = Vec::new();
|
|
||||||
let result = self.read_bytes_unchecked_into(position, byte_count, &mut output);
|
|
||||||
result.into_cow(output)
|
|
||||||
}
|
|
||||||
|
|
||||||
#[doc(hidden)]
|
|
||||||
#[inline]
|
|
||||||
pub unsafe fn read_bytes_unchecked_into(
|
|
||||||
&self,
|
&self,
|
||||||
position: usize,
|
position: usize,
|
||||||
byte_count: usize,
|
byte_count: usize,
|
||||||
output: &mut Vec<u8>,
|
output: &mut Vec<u8>,
|
||||||
) -> MaybeBorrowed<'a, [u8]> {
|
) {
|
||||||
let shift = position & 7;
|
let shift = position & 7;
|
||||||
|
|
||||||
if shift == 0 {
|
|
||||||
let byte_pos = position / 8;
|
|
||||||
return MaybeBorrowed::Borrowed(&self.slice[byte_pos..byte_pos + byte_count]);
|
|
||||||
}
|
|
||||||
|
|
||||||
output.clear();
|
|
||||||
output.reserve(byte_count);
|
|
||||||
let mut byte_left = byte_count;
|
let mut byte_left = byte_count;
|
||||||
let mut read_pos = position / 8;
|
let mut read_pos = position / 8;
|
||||||
|
|
||||||
|
|
@ -617,6 +594,42 @@ where
|
||||||
pos += 8;
|
pos += 8;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[doc(hidden)]
|
||||||
|
#[inline]
|
||||||
|
pub unsafe fn read_bytes_unchecked(&self, position: usize, byte_count: usize) -> Cow<'a, [u8]> {
|
||||||
|
let shift = position & 7;
|
||||||
|
|
||||||
|
if shift == 0 {
|
||||||
|
let byte_pos = position / 8;
|
||||||
|
return Cow::Borrowed(&self.slice[byte_pos..byte_pos + byte_count]);
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut output = Vec::with_capacity(byte_count);
|
||||||
|
self.read_bytes_unchecked_owned(position, byte_count, &mut output);
|
||||||
|
|
||||||
|
Cow::Owned(output)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[doc(hidden)]
|
||||||
|
#[inline]
|
||||||
|
pub unsafe fn read_bytes_unchecked_into(
|
||||||
|
&self,
|
||||||
|
position: usize,
|
||||||
|
byte_count: usize,
|
||||||
|
output: &mut Vec<u8>,
|
||||||
|
) -> MaybeBorrowed<'a, [u8]> {
|
||||||
|
let shift = position & 7;
|
||||||
|
|
||||||
|
if shift == 0 {
|
||||||
|
let byte_pos = position / 8;
|
||||||
|
return MaybeBorrowed::Borrowed(&self.slice[byte_pos..byte_pos + byte_count]);
|
||||||
|
}
|
||||||
|
|
||||||
|
output.clear();
|
||||||
|
output.reserve(byte_count);
|
||||||
|
self.read_bytes_unchecked_owned(position, byte_count, output);
|
||||||
|
|
||||||
MaybeBorrowed::Owned
|
MaybeBorrowed::Owned
|
||||||
}
|
}
|
||||||
|
|
@ -660,9 +673,14 @@ where
|
||||||
/// [`ReadError::Utf8Error`]: enum.ReadError.html#variant.Utf8Error
|
/// [`ReadError::Utf8Error`]: enum.ReadError.html#variant.Utf8Error
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn read_string(&self, position: usize, byte_len: Option<usize>) -> Result<Cow<'a, str>> {
|
pub fn read_string(&self, position: usize, byte_len: Option<usize>) -> Result<Cow<'a, str>> {
|
||||||
let mut output = String::new();
|
let shift = position & 7;
|
||||||
let result = self.read_string_into(position, byte_len, &mut output)?;
|
if shift == 0 {
|
||||||
Ok(result.into_cow(output))
|
return Ok(Cow::Borrowed(self.read_string_aligned(position, byte_len)?));
|
||||||
|
}
|
||||||
|
|
||||||
|
let output = Vec::with_capacity(64);
|
||||||
|
let string = self.read_string_unaligned(position, byte_len, output)?;
|
||||||
|
Ok(Cow::Owned(string))
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Read a series of bytes from the buffer as string.
|
/// Read a series of bytes from the buffer as string.
|
||||||
|
|
@ -713,103 +731,114 @@ where
|
||||||
byte_len: Option<usize>,
|
byte_len: Option<usize>,
|
||||||
output: &mut String,
|
output: &mut String,
|
||||||
) -> Result<MaybeBorrowed<'a, str>> {
|
) -> Result<MaybeBorrowed<'a, str>> {
|
||||||
let mut taken_output = take(output).into_bytes();
|
|
||||||
match byte_len {
|
|
||||||
Some(byte_len) => {
|
|
||||||
let bytes = self.read_bytes_into(position, byte_len, &mut taken_output)?;
|
|
||||||
|
|
||||||
let result = match bytes {
|
|
||||||
MaybeBorrowed::Owned => {
|
|
||||||
*output = String::from_utf8(taken_output)?
|
|
||||||
.trim_end_matches(char::from(0))
|
|
||||||
.to_string();
|
|
||||||
MaybeBorrowed::Owned
|
|
||||||
}
|
|
||||||
MaybeBorrowed::Borrowed(bytes) => MaybeBorrowed::Borrowed(
|
|
||||||
std::str::from_utf8(bytes)
|
|
||||||
.map_err(|err| BitError::Utf8Error(err, bytes.len()))?
|
|
||||||
.trim_end_matches(char::from(0)),
|
|
||||||
),
|
|
||||||
};
|
|
||||||
Ok(result)
|
|
||||||
}
|
|
||||||
None => {
|
|
||||||
let bytes = self.read_string_bytes(position, &mut taken_output)?;
|
|
||||||
let result = match bytes {
|
|
||||||
MaybeBorrowed::Owned => {
|
|
||||||
*output = String::from_utf8(taken_output)?;
|
|
||||||
MaybeBorrowed::Owned
|
|
||||||
}
|
|
||||||
MaybeBorrowed::Borrowed(bytes) => MaybeBorrowed::Borrowed(
|
|
||||||
std::str::from_utf8(bytes)
|
|
||||||
.map_err(|err| BitError::Utf8Error(err, bytes.len()))?,
|
|
||||||
),
|
|
||||||
};
|
|
||||||
Ok(result)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[inline]
|
|
||||||
fn find_null_byte(&self, byte_index: usize) -> usize {
|
|
||||||
memchr::memchr(0, &self.slice[byte_index..])
|
|
||||||
.map(|index| index + byte_index)
|
|
||||||
.unwrap_or(self.slice.len()) // due to padding we always have 0 bytes at the end
|
|
||||||
}
|
|
||||||
|
|
||||||
#[inline]
|
|
||||||
fn read_string_bytes<'output>(
|
|
||||||
&self,
|
|
||||||
position: usize,
|
|
||||||
buffer: &'output mut Vec<u8>,
|
|
||||||
) -> Result<MaybeBorrowed<'a, [u8]>> {
|
|
||||||
let shift = position & 7;
|
let shift = position & 7;
|
||||||
if shift == 0 {
|
if shift == 0 {
|
||||||
let byte_index = position / 8;
|
return Ok(MaybeBorrowed::Borrowed(
|
||||||
Ok(MaybeBorrowed::Borrowed(
|
self.read_string_aligned(position, byte_len)?,
|
||||||
&self.slice[byte_index..self.find_null_byte(byte_index)],
|
));
|
||||||
))
|
}
|
||||||
} else {
|
|
||||||
buffer.clear();
|
|
||||||
buffer.reserve(64);
|
|
||||||
if E::is_le() {
|
|
||||||
let mut byte_index = position / 8;
|
|
||||||
loop {
|
|
||||||
// note: if less then a usize worth of data is left in the buffer, read_usize_bytes
|
|
||||||
// will automatically pad with null bytes, triggering the loop termination
|
|
||||||
// thus no separate logic for dealing with the end of the bytes is required
|
|
||||||
//
|
|
||||||
// This is safe because the final usize is filled with 0's, thus triggering the exit clause
|
|
||||||
// before reading any out of bounds
|
|
||||||
let shifted = unsafe { self.read_shifted_usize(byte_index, shift, true) };
|
|
||||||
|
|
||||||
let has_null = contains_zero_byte_non_top(shifted);
|
let taken_output = take(output).into_bytes();
|
||||||
let bytes: [u8; USIZE_SIZE] = shifted.to_le_bytes();
|
*output = self.read_string_unaligned(position, byte_len, taken_output)?;
|
||||||
let usable_bytes = &bytes[0..USIZE_SIZE - 1];
|
|
||||||
|
|
||||||
if has_null {
|
Ok(MaybeBorrowed::Owned)
|
||||||
for i in 0..USIZE_SIZE - 1 {
|
}
|
||||||
if usable_bytes[i] == 0 {
|
|
||||||
buffer.extend_from_slice(&usable_bytes[0..i]);
|
#[inline(always)]
|
||||||
return Ok(MaybeBorrowed::Owned);
|
fn read_string_unaligned(
|
||||||
}
|
&self,
|
||||||
|
position: usize,
|
||||||
|
byte_len: Option<usize>,
|
||||||
|
mut output: Vec<u8>,
|
||||||
|
) -> Result<String> {
|
||||||
|
Ok(match byte_len {
|
||||||
|
Some(byte_len) => {
|
||||||
|
self.bounds_check(position, byte_len * 8)?;
|
||||||
|
unsafe { self.read_bytes_unchecked_owned(position, byte_len, &mut output) };
|
||||||
|
|
||||||
|
String::from_utf8(output)?
|
||||||
|
.trim_end_matches(char::from(0))
|
||||||
|
.to_string()
|
||||||
|
}
|
||||||
|
None => {
|
||||||
|
self.read_string_bytes(position, &mut output)?;
|
||||||
|
String::from_utf8(output)?
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
|
fn read_string_aligned(&self, position: usize, byte_len: Option<usize>) -> Result<&'a str> {
|
||||||
|
let byte_pos = position / 8;
|
||||||
|
let str = match byte_len {
|
||||||
|
Some(byte_len) => {
|
||||||
|
self.bounds_check(position, byte_len * 8)?;
|
||||||
|
let bytes = unsafe { self.slice.get_unchecked(byte_pos..byte_pos + byte_len) };
|
||||||
|
str::from_utf8(bytes)
|
||||||
|
.map_err(|err| BitError::Utf8Error(err, byte_len))?
|
||||||
|
.trim_end_matches(char::from(0))
|
||||||
|
}
|
||||||
|
None => {
|
||||||
|
if byte_pos > self.byte_len() {
|
||||||
|
return Err(BitError::IndexOutOfBounds {
|
||||||
|
pos: position,
|
||||||
|
size: self.bit_len(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
let slice = unsafe { self.slice.get_unchecked(byte_pos..) };
|
||||||
|
let byte_len = memchr::memchr(0, slice).unwrap_or_default();
|
||||||
|
|
||||||
|
let bytes = unsafe { self.slice.get_unchecked(byte_pos..byte_pos + byte_len) };
|
||||||
|
str::from_utf8(bytes).map_err(|err| BitError::Utf8Error(err, byte_len))?
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok(str)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn read_string_bytes(&self, position: usize, buffer: &mut Vec<u8>) -> Result<()> {
|
||||||
|
let shift = position & 7;
|
||||||
|
buffer.clear();
|
||||||
|
buffer.reserve(64);
|
||||||
|
if E::is_le() {
|
||||||
|
let mut byte_index = position / 8;
|
||||||
|
loop {
|
||||||
|
// note: if less then a usize worth of data is left in the buffer, read_usize_bytes
|
||||||
|
// will automatically pad with null bytes, triggering the loop termination
|
||||||
|
// thus no separate logic for dealing with the end of the bytes is required
|
||||||
|
//
|
||||||
|
// This is safe because the final usize is filled with 0's, thus triggering the exit clause
|
||||||
|
// before reading any out of bounds
|
||||||
|
let shifted = unsafe { self.read_shifted_usize(byte_index, shift, true) };
|
||||||
|
|
||||||
|
let has_null = contains_zero_byte_non_top(shifted);
|
||||||
|
let bytes: [u8; USIZE_SIZE] = shifted.to_le_bytes();
|
||||||
|
let usable_bytes = &bytes[0..USIZE_SIZE - 1];
|
||||||
|
|
||||||
|
if has_null {
|
||||||
|
for i in 0..USIZE_SIZE - 1 {
|
||||||
|
if usable_bytes[i] == 0 {
|
||||||
|
buffer.extend_from_slice(&usable_bytes[0..i]);
|
||||||
|
return Ok(());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
buffer.extend_from_slice(&usable_bytes[0..USIZE_SIZE - 1]);
|
|
||||||
|
|
||||||
byte_index += USIZE_SIZE - 1;
|
|
||||||
}
|
}
|
||||||
} else {
|
|
||||||
let mut pos = position;
|
buffer.extend_from_slice(&usable_bytes[0..USIZE_SIZE - 1]);
|
||||||
loop {
|
|
||||||
let byte = self.read_int::<u8>(pos, 8)?;
|
byte_index += USIZE_SIZE - 1;
|
||||||
pos += 8;
|
}
|
||||||
if byte == 0 {
|
} else {
|
||||||
return Ok(MaybeBorrowed::Owned);
|
let mut pos = position;
|
||||||
} else {
|
loop {
|
||||||
buffer.push(byte);
|
let byte = self.read_int::<u8>(pos, 8)?;
|
||||||
}
|
pos += 8;
|
||||||
|
if byte == 0 {
|
||||||
|
return Ok(());
|
||||||
|
} else {
|
||||||
|
buffer.push(byte);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -846,19 +875,7 @@ where
|
||||||
{
|
{
|
||||||
let type_bit_size = size_of::<T>() * 8;
|
let type_bit_size = size_of::<T>() * 8;
|
||||||
if position + type_bit_size + USIZE_BIT_SIZE > self.bit_len() {
|
if position + type_bit_size + USIZE_BIT_SIZE > self.bit_len() {
|
||||||
if position + type_bit_size > self.bit_len() {
|
self.bounds_check(position, type_bit_size)?;
|
||||||
if position > self.bit_len() {
|
|
||||||
return Err(BitError::IndexOutOfBounds {
|
|
||||||
pos: position,
|
|
||||||
size: self.bit_len(),
|
|
||||||
});
|
|
||||||
} else {
|
|
||||||
return Err(BitError::NotEnoughData {
|
|
||||||
requested: size_of::<T>() * 8,
|
|
||||||
bits_left: self.bit_len() - position,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Ok(unsafe { self.read_float_unchecked(position, true) })
|
Ok(unsafe { self.read_float_unchecked(position, true) })
|
||||||
} else {
|
} else {
|
||||||
Ok(unsafe { self.read_float_unchecked(position, false) })
|
Ok(unsafe { self.read_float_unchecked(position, false) })
|
||||||
|
|
@ -884,12 +901,7 @@ where
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn get_sub_buffer(&self, bit_len: usize) -> Result<Self> {
|
pub(crate) fn get_sub_buffer(&self, bit_len: usize) -> Result<Self> {
|
||||||
if bit_len > self.bit_len() {
|
self.bounds_check(0, bit_len)?;
|
||||||
return Err(BitError::NotEnoughData {
|
|
||||||
requested: bit_len,
|
|
||||||
bits_left: self.bit_len(),
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(BitReadBuffer {
|
Ok(BitReadBuffer {
|
||||||
bytes: self.bytes.clone(),
|
bytes: self.bytes.clone(),
|
||||||
|
|
@ -901,12 +913,7 @@ where
|
||||||
|
|
||||||
/// Truncate the buffer to a given bit length
|
/// Truncate the buffer to a given bit length
|
||||||
pub fn truncate(&mut self, bit_len: usize) -> Result<()> {
|
pub fn truncate(&mut self, bit_len: usize) -> Result<()> {
|
||||||
if bit_len > self.bit_len() {
|
self.bounds_check(bit_len, 0)?;
|
||||||
return Err(BitError::NotEnoughData {
|
|
||||||
requested: bit_len,
|
|
||||||
bits_left: self.bit_len(),
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
self.bit_len = bit_len;
|
self.bit_len = bit_len;
|
||||||
Ok(())
|
Ok(())
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue