1
0
Fork 0
mirror of https://codeberg.org/icewind/bitbuffer.git synced 2026-06-03 16:44:06 +02:00

add read_*_into methods

This commit is contained in:
Robin Appelman 2025-07-13 22:07:12 +02:00
commit 774d820aaf
4 changed files with 393 additions and 47 deletions

View file

@ -75,6 +75,7 @@
#![warn(missing_docs)]
use std::borrow::Cow;
use thiserror::Error;
pub use bitbuffer_derive::{BitRead, BitReadSized, BitWrite, BitWriteSized};
@ -190,3 +191,49 @@ pub fn bit_size_of<'a, T: BitRead<'a, LittleEndian>>() -> Option<usize> {
pub fn bit_size_of_sized<'a, T: BitReadSized<'a, LittleEndian>>(size: usize) -> Option<usize> {
T::bit_size_sized(size)
}
/// A result that is either borrowed from the source buffer, or written into the provided output buffer
///
/// This is similar to `Cow` but allows reusing the owned allocation
#[derive(Copy, Clone)]
pub enum MaybeBorrowed<'a, B>
where
B: 'a + ToOwned + ?Sized,
{
/// Result borrowed from the source buffer
Borrowed(&'a B),
/// Marks that the result has been written into the provided output buffer,
Owned,
}
impl<'a, B: 'a + ToOwned + ?Sized> MaybeBorrowed<'a, B> {
/// Create a `Cow<'a, B>` from the `MaybeBorrowed<'a, B>` and the buffer containing the owned data.
pub fn into_cow(self, owned_buffer: B::Owned) -> Cow<'a, B> {
match self {
MaybeBorrowed::Borrowed(b) => Cow::Borrowed(b),
MaybeBorrowed::Owned => Cow::Owned(owned_buffer),
}
}
/// Get an owned copy of the data, either by copying the borrowed data or using the owned data.
pub fn to_owned(&self, owned_buffer: B::Owned) -> B::Owned {
match *self {
MaybeBorrowed::Borrowed(b) => b.to_owned(),
MaybeBorrowed::Owned => owned_buffer,
}
}
}
impl<'a, B> MaybeBorrowed<'a, B>
where
B: 'a + ToOwned + ?Sized,
B::Owned: AsRef<B>,
{
/// Get a reference from either the `MaybeBorrowed<'a, B>` or the buffer containing the owned data.
pub fn as_ref<'o>(&'o self, owned_buffer: &'o B::Owned) -> &'o B {
match self {
MaybeBorrowed::Borrowed(b) => b,
MaybeBorrowed::Owned => owned_buffer.as_ref(),
}
}
}

View file

@ -2,14 +2,14 @@ use std::cmp::min;
use std::fmt;
use std::fmt::Debug;
use std::marker::PhantomData;
use std::mem::size_of;
use std::mem::{size_of, take};
use std::ops::{BitOrAssign, BitXor, Index, Range, RangeFrom};
use num_traits::{Float, PrimInt, WrappingSub};
use crate::endianness::Endianness;
use crate::num_traits::{IsSigned, UncheckedPrimitiveFloat, UncheckedPrimitiveInt};
use crate::{BitError, Result};
use crate::{BitError, MaybeBorrowed, Result};
use std::borrow::{Borrow, Cow};
use std::convert::TryInto;
use std::rc::Rc;
@ -471,6 +471,8 @@ where
/// Read a series of bytes from the buffer
///
/// If the `position` is a multiple of 8, the output will be borrowed, otherwise it will be copied into a new Vec.
///
/// # Errors
///
/// - [`ReadError::NotEnoughData`]: not enough bits available in the buffer
@ -499,6 +501,54 @@ where
/// [`ReadError::NotEnoughData`]: enum.ReadError.html#variant.NotEnoughData
#[inline]
pub fn read_bytes(&self, position: usize, byte_count: usize) -> Result<Cow<'a, [u8]>> {
let mut output = Vec::new();
let result = self.read_bytes_into(position, byte_count, &mut output)?;
Ok(result.into_cow(output))
}
/// Read a series of bytes from the buffer, using an existing buffer
///
/// If the `position` is a multiple of 8, the output will be borrowed, otherwise it will be written to the provided output buffer
///
/// See `[MaybeBorrowed]` for more information for dealing with the result.
///
/// # Errors
///
/// - [`ReadError::NotEnoughData`]: not enough bits available in the buffer
///
/// # Examples
///
/// ```
/// # use bitbuffer::{BitReadBuffer, LittleEndian, Result};
/// #
/// # fn main() -> Result<()> {
/// # let bytes = vec![
/// # 0b1011_0101, 0b0110_1010, 0b1010_1100, 0b1001_1001,
/// # 0b1001_1001, 0b1001_1001, 0b1001_1001, 0b1110_0111
/// # ];
/// # let buffer = BitReadBuffer::new(&bytes, LittleEndian);
/// let mut output = Vec::new();
/// let result = buffer.read_bytes_into(5, 3, &mut output)?;
/// assert_eq!(result.as_ref(&output), &[0b0_1010_101, 0b0_1100_011, 0b1_1001_101]);
/// output.clear();
/// let result = buffer.read_bytes_into(0, 8, &mut output)?;
/// assert_eq!(result.as_ref(&output), &[
/// 0b1011_0101, 0b0110_1010, 0b1010_1100, 0b1001_1001,
/// 0b1001_1001, 0b1001_1001, 0b1001_1001, 0b1110_0111
/// ]);
/// #
/// # Ok(())
/// # }
/// ```
///
/// [`ReadError::NotEnoughData`]: enum.ReadError.html#variant.NotEnoughData
#[inline]
pub fn read_bytes_into(
&self,
position: usize,
byte_count: usize,
output: &mut Vec<u8>,
) -> Result<MaybeBorrowed<'a, [u8]>> {
if position + byte_count * 8 > self.bit_len() {
if position > self.bit_len() {
return Err(BitError::IndexOutOfBounds {
@ -513,20 +563,34 @@ where
}
}
Ok(unsafe { self.read_bytes_unchecked(position, byte_count) })
Ok(unsafe { self.read_bytes_unchecked_into(position, byte_count, output) })
}
#[doc(hidden)]
#[inline]
pub unsafe fn read_bytes_unchecked(&self, position: usize, byte_count: usize) -> Cow<'a, [u8]> {
let mut output = Vec::new();
let result = self.read_bytes_unchecked_into(position, byte_count, &mut output);
result.into_cow(output)
}
#[doc(hidden)]
#[inline]
pub unsafe fn read_bytes_unchecked_into(
&self,
position: usize,
byte_count: usize,
output: &mut Vec<u8>,
) -> MaybeBorrowed<'a, [u8]> {
let shift = position & 7;
if shift == 0 {
let byte_pos = position / 8;
return Cow::Borrowed(&self.slice[byte_pos..byte_pos + byte_count]);
return MaybeBorrowed::Borrowed(&self.slice[byte_pos..byte_pos + byte_count]);
}
let mut data = Vec::with_capacity(byte_count);
output.clear();
output.reserve(byte_count);
let mut byte_left = byte_count;
let mut read_pos = position / 8;
@ -536,7 +600,7 @@ where
let bytes = raw.to_le_bytes();
let read_bytes = USIZE_SIZE - 1;
let usable_bytes = &bytes[0..read_bytes];
data.extend_from_slice(usable_bytes);
output.extend_from_slice(usable_bytes);
read_pos += read_bytes;
byte_left -= read_bytes;
@ -544,22 +608,24 @@ where
let bytes = self.read_shifted_usize(read_pos, shift, true).to_le_bytes();
let usable_bytes = &bytes[0..byte_left];
data.extend_from_slice(usable_bytes);
output.extend_from_slice(usable_bytes);
} else {
let mut pos = position;
while byte_left > 0 {
data.push(self.read_int_unchecked::<u8>(pos, 8, true));
output.push(self.read_int_unchecked::<u8>(pos, 8, true));
byte_left -= 1;
pos += 8;
}
}
Cow::Owned(data)
MaybeBorrowed::Owned
}
/// Read a series of bytes from the buffer as string
/// Read a series of bytes from the buffer as string.
///
/// You can either read a fixed number of bytes, or a dynamic length null-terminated string
/// You can either read a fixed number of bytes, or a dynamic length null-terminated string.
///
/// If the `position` is a multiple of 8, the output will be borrowed, otherwise it will be copied into a new String.
///
/// # Errors
///
@ -580,7 +646,7 @@ where
/// # ];
/// # let buffer = BitReadBuffer::new(&bytes, LittleEndian);
/// // Fixed length string
/// assert_eq!(buffer.read_string(0, Some(13))?, "Hello world".to_owned());
/// assert_eq!(buffer.read_string(0, Some(11))?, "Hello world".to_owned());
/// // fixed length with null padding
/// assert_eq!(buffer.read_string(0, Some(16))?, "Hello world".to_owned());
/// // null terminated
@ -594,34 +660,92 @@ where
/// [`ReadError::Utf8Error`]: enum.ReadError.html#variant.Utf8Error
#[inline]
pub fn read_string(&self, position: usize, byte_len: Option<usize>) -> Result<Cow<'a, str>> {
let mut output = String::new();
let result = self.read_string_into(position, byte_len, &mut output)?;
Ok(result.into_cow(output))
}
/// Read a series of bytes from the buffer as string.
///
/// You can either read a fixed number of bytes, or a dynamic length null-terminated string.
///
/// If the `position` is a multiple of 8, the output will be borrowed, otherwise it will be copied into the provided String.
///
/// # Errors
///
/// - [`ReadError::NotEnoughData`]: not enough bits available in the buffer
/// - [`ReadError::Utf8Error`]: the read bytes are not valid utf8
///
/// # Examples
///
/// ```
/// # use bitbuffer::{BitReadBuffer, BitReadStream, LittleEndian, Result};
/// #
/// # fn main() -> Result<()> {
/// # let bytes = vec![
/// # 0x48, 0x65, 0x6c, 0x6c,
/// # 0x6f, 0x20, 0x77, 0x6f,
/// # 0x72, 0x6c, 0x64, 0,
/// # 0, 0, 0, 0
/// # ];
/// # let buffer = BitReadBuffer::new(&bytes, LittleEndian);
/// let mut output = String::new();
/// // Fixed length string
/// let result = buffer.read_string_into(0, Some(11), &mut output)?;
/// assert_eq!(result.as_ref(&output), "Hello world");
/// // fixed length with null padding
/// let result = buffer.read_string_into(0, Some(16), &mut output)?;
/// assert_eq!(result.as_ref(&output), "Hello world".to_owned());
/// // null terminated
/// let result = buffer.read_string_into(0, None, &mut output)?;
/// assert_eq!(result.as_ref(&output), "Hello world".to_owned());
/// #
/// # Ok(())
/// # }
/// ```
///
/// [`ReadError::NotEnoughData`]: enum.ReadError.html#variant.NotEnoughData
/// [`ReadError::Utf8Error`]: enum.ReadError.html#variant.Utf8Error
#[inline]
pub fn read_string_into(
&self,
position: usize,
byte_len: Option<usize>,
output: &mut String,
) -> Result<MaybeBorrowed<'a, str>> {
let mut taken_output = take(output).into_bytes();
match byte_len {
Some(byte_len) => {
let bytes = self.read_bytes(position, byte_len)?;
let bytes = self.read_bytes_into(position, byte_len, &mut taken_output)?;
let string = match bytes {
Cow::Owned(bytes) => Cow::Owned(
String::from_utf8(bytes)?
let result = match bytes {
MaybeBorrowed::Owned => {
*output = String::from_utf8(taken_output)?
.trim_end_matches(char::from(0))
.to_string(),
),
Cow::Borrowed(bytes) => Cow::Borrowed(
.to_string();
MaybeBorrowed::Owned
}
MaybeBorrowed::Borrowed(bytes) => MaybeBorrowed::Borrowed(
std::str::from_utf8(bytes)
.map_err(|err| BitError::Utf8Error(err, bytes.len()))?
.trim_end_matches(char::from(0)),
),
};
Ok(string)
Ok(result)
}
None => {
let bytes = self.read_string_bytes(position)?;
let string = match bytes {
Cow::Owned(bytes) => Cow::Owned(String::from_utf8(bytes)?),
Cow::Borrowed(bytes) => Cow::Borrowed(
let bytes = self.read_string_bytes(position, &mut taken_output)?;
let result = match bytes {
MaybeBorrowed::Owned => {
*output = String::from_utf8(taken_output)?;
MaybeBorrowed::Owned
}
MaybeBorrowed::Borrowed(bytes) => MaybeBorrowed::Borrowed(
std::str::from_utf8(bytes)
.map_err(|err| BitError::Utf8Error(err, bytes.len()))?,
),
};
Ok(string)
Ok(result)
}
}
}
@ -634,15 +758,20 @@ where
}
#[inline]
fn read_string_bytes(&self, position: usize) -> Result<Cow<'a, [u8]>> {
fn read_string_bytes<'output>(
&self,
position: usize,
buffer: &'output mut Vec<u8>,
) -> Result<MaybeBorrowed<'a, [u8]>> {
let shift = position & 7;
if shift == 0 {
let byte_index = position / 8;
Ok(Cow::Borrowed(
Ok(MaybeBorrowed::Borrowed(
&self.slice[byte_index..self.find_null_byte(byte_index)],
))
} else {
let mut acc = Vec::with_capacity(32);
buffer.clear();
buffer.reserve(64);
if E::is_le() {
let mut byte_index = position / 8;
loop {
@ -661,13 +790,13 @@ where
if has_null {
for i in 0..USIZE_SIZE - 1 {
if usable_bytes[i] == 0 {
acc.extend_from_slice(&usable_bytes[0..i]);
return Ok(Cow::Owned(acc));
buffer.extend_from_slice(&usable_bytes[0..i]);
return Ok(MaybeBorrowed::Owned);
}
}
}
acc.extend_from_slice(&usable_bytes[0..USIZE_SIZE - 1]);
buffer.extend_from_slice(&usable_bytes[0..USIZE_SIZE - 1]);
byte_index += USIZE_SIZE - 1;
}
@ -677,9 +806,9 @@ where
let byte = self.read_int::<u8>(pos, 8)?;
pos += 8;
if byte == 0 {
return Ok(Cow::Owned(acc));
return Ok(MaybeBorrowed::Owned);
} else {
acc.push(byte);
buffer.push(byte);
}
}
}

View file

@ -6,8 +6,8 @@ use num_traits::{Float, PrimInt, WrappingSub};
use crate::endianness::Endianness;
use crate::num_traits::{IsSigned, UncheckedPrimitiveFloat, UncheckedPrimitiveInt};
use crate::readbuffer::Data;
use crate::BitReadBuffer;
use crate::{BitError, BitRead, BitReadSized, Result};
use crate::{BitReadBuffer, MaybeBorrowed};
use std::borrow::Cow;
use std::cmp::min;
@ -214,6 +214,8 @@ where
/// Read a series of bytes from the stream
///
/// If the current read position is on a byte boundary, the output will be borrowed, otherwise it will be copied into a new Vec
///
/// # Errors
///
/// - [`ReadError::NotEnoughData`]: not enough bits available in the stream
@ -249,6 +251,51 @@ where
result
}
/// Read a series of bytes from the stream
///
/// If the current read position is on a byte boundary, the output will be borrowed, otherwise it will be written to the provided output buffer
///
/// # Errors
///
/// - [`ReadError::NotEnoughData`]: not enough bits available in the stream
///
/// # Examples
///
/// ```
/// # use bitbuffer::{BitReadBuffer, BitReadStream, LittleEndian, Result};
/// #
/// # fn main() -> Result<()> {
/// # use std::borrow::Borrow;
/// let bytes = vec![
/// # 0b1011_0101, 0b0110_1010, 0b1010_1100, 0b1001_1001,
/// # 0b1001_1001, 0b1001_1001, 0b1001_1001, 0b1110_0111
/// # ];
/// # let buffer = BitReadBuffer::new(&bytes, LittleEndian);
/// # let mut stream = BitReadStream::new(buffer);
/// let mut output = Vec::new();
/// let result = stream.read_bytes_into(3, &mut output)?;
/// assert_eq!(result.as_ref(&output), &[0b1011_0101, 0b0110_1010, 0b1010_1100]);
/// assert_eq!(stream.pos(), 24);
/// #
/// # Ok(())
/// # }
/// ```
///
/// [`ReadError::NotEnoughData`]: enum.ReadError.html#variant.NotEnoughData
#[inline]
pub fn read_bytes_into(
&mut self,
byte_count: usize,
output: &mut Vec<u8>,
) -> Result<MaybeBorrowed<'a, [u8]>> {
let count = byte_count * 8;
let result = self.buffer.read_bytes_into(self.pos, byte_count, output);
if result.is_ok() {
self.pos += count;
}
result
}
#[doc(hidden)]
#[inline]
pub unsafe fn read_bytes_unchecked(&mut self, byte_count: usize) -> Cow<'a, [u8]> {
@ -258,10 +305,27 @@ where
result
}
#[doc(hidden)]
#[inline]
pub unsafe fn read_bytes_unchecked_into(
&mut self,
byte_count: usize,
output: &mut Vec<u8>,
) -> MaybeBorrowed<'a, [u8]> {
let count = byte_count * 8;
let result = self
.buffer
.read_bytes_unchecked_into(self.pos, byte_count, output);
self.pos += count;
result
}
/// Read a series of bytes from the stream as utf8 string
///
/// You can either read a fixed number of bytes, or a dynamic length null-terminated string
///
/// If the current read position is on a byte boundary, the output will be borrowed, otherwise it will be copied into a new String
///
/// # Errors
///
/// - [`ReadError::NotEnoughData`]: not enough bits available in the stream
@ -302,11 +366,70 @@ where
/// [`ReadError::Utf8Error`]: enum.ReadError.html#variant.Utf8Error
#[inline]
pub fn read_string(&mut self, byte_len: Option<usize>) -> Result<Cow<'a, str>> {
let mut output = String::new();
let result = self.read_string_into(byte_len, &mut output)?;
Ok(result.into_cow(output))
}
/// Read a series of bytes from the stream as utf8 string
///
/// You can either read a fixed number of bytes, or a dynamic length null-terminated string
///
/// If the current read position is on a byte boundary, the output will be borrowed, otherwise it will be written to the provided output String
///
/// # Errors
///
/// - [`ReadError::NotEnoughData`]: not enough bits available in the stream
/// - [`ReadError::Utf8Error`]: the read bytes are not valid utf8
///
/// # Examples
///
/// ```
/// # use bitbuffer::{BitReadBuffer, BitReadStream, LittleEndian, Result};
/// #
/// # fn main() -> Result<()> {
/// # let bytes = vec![
/// # 0x48, 0x65, 0x6c, 0x6c,
/// # 0x6f, 0x20, 0x77, 0x6f,
/// # 0x72, 0x6c, 0x64, 0,
/// # 0, 0, 0, 0
/// # ];
/// # let buffer = BitReadBuffer::new(&bytes, LittleEndian);
/// # let mut stream = BitReadStream::new(buffer);
/// // Fixed length string
/// stream.set_pos(0);
/// let mut output = String::new();
/// let result = stream.read_string_into(Some(11), &mut output)?;
/// assert_eq!(result.as_ref(&output), "Hello world".to_owned());
/// assert_eq!(11 * 8, stream.pos());
/// // fixed length with null padding
/// stream.set_pos(0);
/// let result = stream.read_string_into(Some(16), &mut output)?;
/// assert_eq!(result.as_ref(&output), "Hello world".to_owned());
/// assert_eq!(16 * 8, stream.pos());
/// // null terminated
/// stream.set_pos(0);
/// let result = stream.read_string_into(None, &mut output)?;
/// assert_eq!(result.as_ref(&output), "Hello world".to_owned());
/// assert_eq!(12 * 8, stream.pos()); // 1 more for the terminating null byte
/// #
/// # Ok(())
/// # }
/// ```
///
/// [`ReadError::NotEnoughData`]: enum.ReadError.html#variant.NotEnoughData
/// [`ReadError::Utf8Error`]: enum.ReadError.html#variant.Utf8Error
#[inline]
pub fn read_string_into(
&mut self,
byte_len: Option<usize>,
output: &mut String,
) -> Result<MaybeBorrowed<'a, str>> {
let max_length = self.bits_left() / 8;
let result = self
.buffer
.read_string(self.pos, byte_len)
.read_string_into(self.pos, byte_len, output)
.map_err(|mut err| {
// still advance the stream on malformed utf8
if let BitError::Utf8Error(_, len) = &mut err {
@ -319,25 +442,32 @@ where
}
err
})?;
let str = result.as_ref(output);
let read = match byte_len {
Some(len) => len * 8,
None => (result.len() + 1) * 8,
None => (str.len() + 1) * 8,
};
// due to how sub buffer/streams work, the result string can be longer than the current stream
// (but not the top level buffer)
// thus we trim the resulting string to make sure it fits in the source stream
if read > self.bits_left() {
// find the maximum well-formed utf8 string that fits in max_len
let mut acc = String::with_capacity(max_length);
for c in result.chars() {
if acc.len() + c.len_utf8() > max_length {
break;
}
acc.push(c);
let mut target_length = str.len().min(max_length);
// find the closest char boundary
while !str.is_char_boundary(target_length) {
target_length -= 1;
}
self.pos += acc.len() * 8;
return Ok(Cow::Owned(acc));
let trimmed = match result {
MaybeBorrowed::Owned => {
output.truncate(target_length);
MaybeBorrowed::Owned
}
MaybeBorrowed::Borrowed(s) => MaybeBorrowed::Borrowed(&s[0..target_length]),
};
self.pos += target_length * 8;
return Ok(trimmed);
}
self.pos += read;
Ok(result)