1
0
Fork 0
mirror of https://codeberg.org/icewind/bitbuffer.git synced 2026-06-03 08:34:07 +02:00

drop read_*_into for now as it doesn't seem to help much for the complexity

This commit is contained in:
Robin Appelman 2025-07-14 20:06:45 +02:00
commit a07123315c
4 changed files with 13 additions and 350 deletions

View file

@ -121,26 +121,6 @@ fn perf_string<E: Endianness>((offset, buffer): (usize, BitReadBuffer<E>)) {
}
}
#[library_benchmark(setup = build_string_buffer)]
#[bench::le_alligned(0, LittleEndian)]
#[bench::be_alligned(0, BigEndian)]
#[bench::le_unalligned(3, LittleEndian)]
#[bench::be_unalligned(3, BigEndian)]
fn perf_string_into<E: Endianness>((offset, buffer): (usize, BitReadBuffer<E>)) {
let mut pos = offset;
let len = buffer.bit_len();
let mut buff = String::new();
loop {
if pos + (128 * 8) > len {
break;
}
let result = buffer.read_string_into(pos, None, &mut buff).unwrap();
let result = result.as_ref(&buff);
pos += (result.len() + 1) * 8;
black_box(result);
}
}
#[library_benchmark(setup = build_string_buffer)]
#[bench::le_alligned(0, LittleEndian)]
#[bench::be_alligned(0, BigEndian)]
@ -159,26 +139,6 @@ fn perf_bytes<E: Endianness>((offset, buffer): (usize, BitReadBuffer<E>)) {
}
}
#[library_benchmark(setup = build_string_buffer)]
#[bench::le_alligned(0, LittleEndian)]
#[bench::be_alligned(0, BigEndian)]
#[bench::le_unalligned(3, LittleEndian)]
#[bench::be_unalligned(3, BigEndian)]
fn perf_bytes_into<E: Endianness>((offset, buffer): (usize, BitReadBuffer<E>)) {
let mut pos = offset;
let len = buffer.bit_len();
let mut buff = Vec::new();
loop {
if pos + (128 * 8) > len {
break;
}
let result = buffer.read_bytes_into(pos, 128, &mut buff).unwrap();
let result = result.as_ref(&buff);
pos += (result.len() + 1) * 8;
black_box(result);
}
}
#[allow(dead_code)]
#[derive(BitRead)]
struct BasicStruct {
@ -230,14 +190,12 @@ library_benchmark_group!(
library_benchmark_group!(
name = bench_read_string;
compare_by_id = true;
benchmarks = perf_string, perf_string_into
benchmarks = perf_string
);
library_benchmark_group!(
name = bench_read_bytes;
compare_by_id = true;
benchmarks = perf_bytes, perf_bytes_into
benchmarks = perf_bytes
);
library_benchmark_group!(

View file

@ -75,7 +75,6 @@
#![warn(missing_docs)]
use std::borrow::Cow;
use thiserror::Error;
pub use bitbuffer_derive::{BitRead, BitReadSized, BitWrite, BitWriteSized};
@ -191,49 +190,3 @@ pub fn bit_size_of<'a, T: BitRead<'a, LittleEndian>>() -> Option<usize> {
pub fn bit_size_of_sized<'a, T: BitReadSized<'a, LittleEndian>>(size: usize) -> Option<usize> {
T::bit_size_sized(size)
}
/// A result that is either borrowed from the source buffer, or written into the provided output buffer
///
/// This is similar to `Cow` but allows reusing the owned allocation
#[derive(Copy, Clone)]
pub enum MaybeBorrowed<'a, B>
where
B: 'a + ToOwned + ?Sized,
{
/// Result borrowed from the source buffer
Borrowed(&'a B),
/// Marks that the result has been written into the provided output buffer,
Owned,
}
impl<'a, B: 'a + ToOwned + ?Sized> MaybeBorrowed<'a, B> {
/// Create a `Cow<'a, B>` from the `MaybeBorrowed<'a, B>` and the buffer containing the owned data.
pub fn into_cow(self, owned_buffer: B::Owned) -> Cow<'a, B> {
match self {
MaybeBorrowed::Borrowed(b) => Cow::Borrowed(b),
MaybeBorrowed::Owned => Cow::Owned(owned_buffer),
}
}
/// Get an owned copy of the data, either by copying the borrowed data or using the owned data.
pub fn to_owned(&self, owned_buffer: B::Owned) -> B::Owned {
match *self {
MaybeBorrowed::Borrowed(b) => b.to_owned(),
MaybeBorrowed::Owned => owned_buffer,
}
}
}
impl<'a, B> MaybeBorrowed<'a, B>
where
B: 'a + ToOwned + ?Sized,
B::Owned: AsRef<B>,
{
/// Get a reference from either the `MaybeBorrowed<'a, B>` or the buffer containing the owned data.
pub fn as_ref<'o>(&'o self, owned_buffer: &'o B::Owned) -> &'o B {
match self {
MaybeBorrowed::Borrowed(b) => b,
MaybeBorrowed::Owned => owned_buffer.as_ref(),
}
}
}

View file

@ -2,14 +2,14 @@ use std::cmp::min;
use std::fmt;
use std::fmt::Debug;
use std::marker::PhantomData;
use std::mem::{size_of, take};
use std::mem::size_of;
use std::ops::{BitOrAssign, BitXor, Index, Range, RangeFrom};
use num_traits::{Float, PrimInt, WrappingSub};
use crate::endianness::Endianness;
use crate::num_traits::{IsSigned, UncheckedPrimitiveFloat, UncheckedPrimitiveInt};
use crate::{BitError, MaybeBorrowed, Result};
use crate::{BitError, Result};
use std::borrow::{Borrow, Cow};
use std::convert::TryInto;
use std::rc::Rc;
@ -512,54 +512,6 @@ where
Ok(unsafe { self.read_bytes_unchecked(position, byte_count) })
}
/// Read a series of bytes from the buffer, using an existing buffer
///
/// If the `position` is a multiple of 8, the output will be borrowed, otherwise it will be written to the provided output buffer
///
/// See `[MaybeBorrowed]` for more information for dealing with the result.
///
/// # Errors
///
/// - [`ReadError::NotEnoughData`]: not enough bits available in the buffer
///
/// # Examples
///
/// ```
/// # use bitbuffer::{BitReadBuffer, LittleEndian, Result};
/// #
/// # fn main() -> Result<()> {
/// # let bytes = vec![
/// # 0b1011_0101, 0b0110_1010, 0b1010_1100, 0b1001_1001,
/// # 0b1001_1001, 0b1001_1001, 0b1001_1001, 0b1110_0111
/// # ];
/// # let buffer = BitReadBuffer::new(&bytes, LittleEndian);
/// let mut output = Vec::new();
/// let result = buffer.read_bytes_into(5, 3, &mut output)?;
/// assert_eq!(result.as_ref(&output), &[0b0_1010_101, 0b0_1100_011, 0b1_1001_101]);
/// output.clear();
/// let result = buffer.read_bytes_into(0, 8, &mut output)?;
/// assert_eq!(result.as_ref(&output), &[
/// 0b1011_0101, 0b0110_1010, 0b1010_1100, 0b1001_1001,
/// 0b1001_1001, 0b1001_1001, 0b1001_1001, 0b1110_0111
/// ]);
/// #
/// # Ok(())
/// # }
/// ```
///
/// [`ReadError::NotEnoughData`]: enum.ReadError.html#variant.NotEnoughData
#[inline]
pub fn read_bytes_into(
&self,
position: usize,
byte_count: usize,
output: &mut Vec<u8>,
) -> Result<MaybeBorrowed<'a, [u8]>> {
self.bounds_check(position, byte_count * 8)?;
Ok(unsafe { self.read_bytes_unchecked_into(position, byte_count, output) })
}
#[inline(always)]
unsafe fn read_bytes_unchecked_owned(
&self,
@ -567,6 +519,9 @@ where
byte_count: usize,
output: &mut Vec<u8>,
) {
output.clear();
output.reserve(byte_count);
let shift = position & 7;
let mut byte_left = byte_count;
let mut read_pos = position / 8;
@ -612,28 +567,6 @@ where
Cow::Owned(output)
}
#[doc(hidden)]
#[inline]
pub unsafe fn read_bytes_unchecked_into(
&self,
position: usize,
byte_count: usize,
output: &mut Vec<u8>,
) -> MaybeBorrowed<'a, [u8]> {
let shift = position & 7;
if shift == 0 {
let byte_pos = position / 8;
return MaybeBorrowed::Borrowed(&self.slice[byte_pos..byte_pos + byte_count]);
}
output.clear();
output.reserve(byte_count);
self.read_bytes_unchecked_owned(position, byte_count, output);
MaybeBorrowed::Owned
}
/// Read a series of bytes from the buffer as string.
///
/// You can either read a fixed number of bytes, or a dynamic length null-terminated string.
@ -683,67 +616,6 @@ where
Ok(Cow::Owned(string))
}
/// Read a series of bytes from the buffer as string.
///
/// You can either read a fixed number of bytes, or a dynamic length null-terminated string.
///
/// If the `position` is a multiple of 8, the output will be borrowed, otherwise it will be copied into the provided String.
///
/// # Errors
///
/// - [`ReadError::NotEnoughData`]: not enough bits available in the buffer
/// - [`ReadError::Utf8Error`]: the read bytes are not valid utf8
///
/// # Examples
///
/// ```
/// # use bitbuffer::{BitReadBuffer, BitReadStream, LittleEndian, Result};
/// #
/// # fn main() -> Result<()> {
/// # let bytes = vec![
/// # 0x48, 0x65, 0x6c, 0x6c,
/// # 0x6f, 0x20, 0x77, 0x6f,
/// # 0x72, 0x6c, 0x64, 0,
/// # 0, 0, 0, 0
/// # ];
/// # let buffer = BitReadBuffer::new(&bytes, LittleEndian);
/// let mut output = String::new();
/// // Fixed length string
/// let result = buffer.read_string_into(0, Some(11), &mut output)?;
/// assert_eq!(result.as_ref(&output), "Hello world");
/// // fixed length with null padding
/// let result = buffer.read_string_into(0, Some(16), &mut output)?;
/// assert_eq!(result.as_ref(&output), "Hello world".to_owned());
/// // null terminated
/// let result = buffer.read_string_into(0, None, &mut output)?;
/// assert_eq!(result.as_ref(&output), "Hello world".to_owned());
/// #
/// # Ok(())
/// # }
/// ```
///
/// [`ReadError::NotEnoughData`]: enum.ReadError.html#variant.NotEnoughData
/// [`ReadError::Utf8Error`]: enum.ReadError.html#variant.Utf8Error
#[inline]
pub fn read_string_into(
&self,
position: usize,
byte_len: Option<usize>,
output: &mut String,
) -> Result<MaybeBorrowed<'a, str>> {
let shift = position & 7;
if shift == 0 {
return Ok(MaybeBorrowed::Borrowed(
self.read_string_aligned(position, byte_len)?,
));
}
let taken_output = take(output).into_bytes();
*output = self.read_string_unaligned(position, byte_len, taken_output)?;
Ok(MaybeBorrowed::Owned)
}
#[inline(always)]
fn read_string_unaligned(
&self,
@ -774,7 +646,7 @@ where
Some(byte_len) => {
self.bounds_check(position, byte_len * 8)?;
let bytes = unsafe { self.slice.get_unchecked(byte_pos..byte_pos + byte_len) };
str::from_utf8(bytes)
std::str::from_utf8(bytes)
.map_err(|err| BitError::Utf8Error(err, byte_len))?
.trim_end_matches(char::from(0))
}
@ -793,7 +665,7 @@ where
.unwrap_or(self.byte_len() - byte_pos);
let bytes = unsafe { self.slice.get_unchecked(byte_pos..byte_pos + byte_len) };
str::from_utf8(bytes).map_err(|err| BitError::Utf8Error(err, byte_len))?
std::str::from_utf8(bytes).map_err(|err| BitError::Utf8Error(err, byte_len))?
}
};

View file

@ -6,8 +6,8 @@ use num_traits::{Float, PrimInt, WrappingSub};
use crate::endianness::Endianness;
use crate::num_traits::{IsSigned, UncheckedPrimitiveFloat, UncheckedPrimitiveInt};
use crate::readbuffer::Data;
use crate::BitReadBuffer;
use crate::{BitError, BitRead, BitReadSized, Result};
use crate::{BitReadBuffer, MaybeBorrowed};
use std::borrow::Cow;
use std::cmp::min;
@ -251,51 +251,6 @@ where
result
}
/// Read a series of bytes from the stream
///
/// If the current read position is on a byte boundary, the output will be borrowed, otherwise it will be written to the provided output buffer
///
/// # Errors
///
/// - [`ReadError::NotEnoughData`]: not enough bits available in the stream
///
/// # Examples
///
/// ```
/// # use bitbuffer::{BitReadBuffer, BitReadStream, LittleEndian, Result};
/// #
/// # fn main() -> Result<()> {
/// # use std::borrow::Borrow;
/// let bytes = vec![
/// # 0b1011_0101, 0b0110_1010, 0b1010_1100, 0b1001_1001,
/// # 0b1001_1001, 0b1001_1001, 0b1001_1001, 0b1110_0111
/// # ];
/// # let buffer = BitReadBuffer::new(&bytes, LittleEndian);
/// # let mut stream = BitReadStream::new(buffer);
/// let mut output = Vec::new();
/// let result = stream.read_bytes_into(3, &mut output)?;
/// assert_eq!(result.as_ref(&output), &[0b1011_0101, 0b0110_1010, 0b1010_1100]);
/// assert_eq!(stream.pos(), 24);
/// #
/// # Ok(())
/// # }
/// ```
///
/// [`ReadError::NotEnoughData`]: enum.ReadError.html#variant.NotEnoughData
#[inline]
pub fn read_bytes_into(
&mut self,
byte_count: usize,
output: &mut Vec<u8>,
) -> Result<MaybeBorrowed<'a, [u8]>> {
let count = byte_count * 8;
let result = self.buffer.read_bytes_into(self.pos, byte_count, output);
if result.is_ok() {
self.pos += count;
}
result
}
#[doc(hidden)]
#[inline]
pub unsafe fn read_bytes_unchecked(&mut self, byte_count: usize) -> Cow<'a, [u8]> {
@ -305,21 +260,6 @@ where
result
}
#[doc(hidden)]
#[inline]
pub unsafe fn read_bytes_unchecked_into(
&mut self,
byte_count: usize,
output: &mut Vec<u8>,
) -> MaybeBorrowed<'a, [u8]> {
let count = byte_count * 8;
let result = self
.buffer
.read_bytes_unchecked_into(self.pos, byte_count, output);
self.pos += count;
result
}
/// Read a series of bytes from the stream as utf8 string
///
/// You can either read a fixed number of bytes, or a dynamic length null-terminated string
@ -366,76 +306,17 @@ where
/// [`ReadError::Utf8Error`]: enum.ReadError.html#variant.Utf8Error
#[inline]
pub fn read_string(&mut self, byte_len: Option<usize>) -> Result<Cow<'a, str>> {
let mut output = String::new();
let result = self.read_string_into(byte_len, &mut output)?;
Ok(result.into_cow(output))
}
/// Read a series of bytes from the stream as utf8 string
///
/// You can either read a fixed number of bytes, or a dynamic length null-terminated string
///
/// If the current read position is on a byte boundary, the output will be borrowed, otherwise it will be written to the provided output String
///
/// # Errors
///
/// - [`ReadError::NotEnoughData`]: not enough bits available in the stream
/// - [`ReadError::Utf8Error`]: the read bytes are not valid utf8
///
/// # Examples
///
/// ```
/// # use bitbuffer::{BitReadBuffer, BitReadStream, LittleEndian, Result};
/// #
/// # fn main() -> Result<()> {
/// # let bytes = vec![
/// # 0x48, 0x65, 0x6c, 0x6c,
/// # 0x6f, 0x20, 0x77, 0x6f,
/// # 0x72, 0x6c, 0x64, 0,
/// # 0, 0, 0, 0
/// # ];
/// # let buffer = BitReadBuffer::new(&bytes, LittleEndian);
/// # let mut stream = BitReadStream::new(buffer);
/// // Fixed length string
/// stream.set_pos(0);
/// let mut output = String::new();
/// let result = stream.read_string_into(Some(11), &mut output)?;
/// assert_eq!(result.as_ref(&output), "Hello world".to_owned());
/// assert_eq!(11 * 8, stream.pos());
/// // fixed length with null padding
/// stream.set_pos(0);
/// let result = stream.read_string_into(Some(16), &mut output)?;
/// assert_eq!(result.as_ref(&output), "Hello world".to_owned());
/// assert_eq!(16 * 8, stream.pos());
/// // null terminated
/// stream.set_pos(0);
/// let result = stream.read_string_into(None, &mut output)?;
/// assert_eq!(result.as_ref(&output), "Hello world".to_owned());
/// assert_eq!(12 * 8, stream.pos()); // 1 more for the terminating null byte
/// #
/// # Ok(())
/// # }
/// ```
///
/// [`ReadError::NotEnoughData`]: enum.ReadError.html#variant.NotEnoughData
/// [`ReadError::Utf8Error`]: enum.ReadError.html#variant.Utf8Error
#[inline]
pub fn read_string_into(
&mut self,
byte_len: Option<usize>,
output: &mut String,
) -> Result<MaybeBorrowed<'a, str>> {
let max_length = self.bits_left() / 8;
let result = self
.buffer
.read_string_into(self.pos, byte_len, output)
.read_string(self.pos, byte_len)
.map_err(|mut err| {
// still advance the stream on malformed utf8
if let BitError::Utf8Error(_, len) = &mut err {
self.pos += match byte_len {
Some(len) => len * 8,
None => min((*len + 1) * 8, max_length * 8),
None => min(*len + 1, max_length) * 8,
};
*len = (*len).min(max_length);
@ -443,10 +324,9 @@ where
err
})?;
let str = result.as_ref(output);
let read = match byte_len {
Some(len) => len * 8,
None => (str.len() + 1).min(max_length) * 8,
None => (result.len() + 1).min(max_length) * 8,
};
self.pos += read;