mirror of
https://codeberg.org/icewind/bitbuffer.git
synced 2026-06-03 16:44:06 +02:00
add read_*_into methods
This commit is contained in:
parent
25a28172ad
commit
774d820aaf
4 changed files with 393 additions and 47 deletions
|
|
@ -121,6 +121,26 @@ fn perf_string<E: Endianness>((offset, buffer): (usize, BitReadBuffer<E>)) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[library_benchmark(setup = build_string_buffer)]
|
||||||
|
#[bench::le_alligned(0, LittleEndian)]
|
||||||
|
#[bench::be_alligned(0, BigEndian)]
|
||||||
|
#[bench::le_unalligned(3, LittleEndian)]
|
||||||
|
#[bench::be_unalligned(3, BigEndian)]
|
||||||
|
fn perf_string_into<E: Endianness>((offset, buffer): (usize, BitReadBuffer<E>)) {
|
||||||
|
let mut pos = offset;
|
||||||
|
let len = buffer.bit_len();
|
||||||
|
let mut buff = String::new();
|
||||||
|
loop {
|
||||||
|
if pos + (128 * 8) > len {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
let result = buffer.read_string_into(pos, None, &mut buff).unwrap();
|
||||||
|
let result = result.as_ref(&buff);
|
||||||
|
pos += (result.len() + 1) * 8;
|
||||||
|
black_box(result);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[library_benchmark(setup = build_string_buffer)]
|
#[library_benchmark(setup = build_string_buffer)]
|
||||||
#[bench::le_alligned(0, LittleEndian)]
|
#[bench::le_alligned(0, LittleEndian)]
|
||||||
#[bench::be_alligned(0, BigEndian)]
|
#[bench::be_alligned(0, BigEndian)]
|
||||||
|
|
@ -139,6 +159,26 @@ fn perf_bytes<E: Endianness>((offset, buffer): (usize, BitReadBuffer<E>)) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[library_benchmark(setup = build_string_buffer)]
|
||||||
|
#[bench::le_alligned(0, LittleEndian)]
|
||||||
|
#[bench::be_alligned(0, BigEndian)]
|
||||||
|
#[bench::le_unalligned(3, LittleEndian)]
|
||||||
|
#[bench::be_unalligned(3, BigEndian)]
|
||||||
|
fn perf_bytes_into<E: Endianness>((offset, buffer): (usize, BitReadBuffer<E>)) {
|
||||||
|
let mut pos = offset;
|
||||||
|
let len = buffer.bit_len();
|
||||||
|
let mut buff = Vec::new();
|
||||||
|
loop {
|
||||||
|
if pos + (128 * 8) > len {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
let result = buffer.read_bytes_into(pos, 128, &mut buff).unwrap();
|
||||||
|
let result = result.as_ref(&buff);
|
||||||
|
pos += (result.len() + 1) * 8;
|
||||||
|
black_box(result);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[allow(dead_code)]
|
#[allow(dead_code)]
|
||||||
#[derive(BitRead)]
|
#[derive(BitRead)]
|
||||||
struct BasicStruct {
|
struct BasicStruct {
|
||||||
|
|
@ -191,13 +231,13 @@ library_benchmark_group!(
|
||||||
library_benchmark_group!(
|
library_benchmark_group!(
|
||||||
name = bench_read_string;
|
name = bench_read_string;
|
||||||
compare_by_id = true;
|
compare_by_id = true;
|
||||||
benchmarks = perf_string
|
benchmarks = perf_string, perf_string_into
|
||||||
);
|
);
|
||||||
|
|
||||||
library_benchmark_group!(
|
library_benchmark_group!(
|
||||||
name = bench_read_bytes;
|
name = bench_read_bytes;
|
||||||
compare_by_id = true;
|
compare_by_id = true;
|
||||||
benchmarks = perf_bytes
|
benchmarks = perf_bytes, perf_bytes_into
|
||||||
);
|
);
|
||||||
|
|
||||||
library_benchmark_group!(
|
library_benchmark_group!(
|
||||||
|
|
|
||||||
47
src/lib.rs
47
src/lib.rs
|
|
@ -75,6 +75,7 @@
|
||||||
|
|
||||||
#![warn(missing_docs)]
|
#![warn(missing_docs)]
|
||||||
|
|
||||||
|
use std::borrow::Cow;
|
||||||
use thiserror::Error;
|
use thiserror::Error;
|
||||||
|
|
||||||
pub use bitbuffer_derive::{BitRead, BitReadSized, BitWrite, BitWriteSized};
|
pub use bitbuffer_derive::{BitRead, BitReadSized, BitWrite, BitWriteSized};
|
||||||
|
|
@ -190,3 +191,49 @@ pub fn bit_size_of<'a, T: BitRead<'a, LittleEndian>>() -> Option<usize> {
|
||||||
pub fn bit_size_of_sized<'a, T: BitReadSized<'a, LittleEndian>>(size: usize) -> Option<usize> {
|
pub fn bit_size_of_sized<'a, T: BitReadSized<'a, LittleEndian>>(size: usize) -> Option<usize> {
|
||||||
T::bit_size_sized(size)
|
T::bit_size_sized(size)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// A result that is either borrowed from the source buffer, or written into the provided output buffer
|
||||||
|
///
|
||||||
|
/// This is similar to `Cow` but allows reusing the owned allocation
|
||||||
|
#[derive(Copy, Clone)]
|
||||||
|
pub enum MaybeBorrowed<'a, B>
|
||||||
|
where
|
||||||
|
B: 'a + ToOwned + ?Sized,
|
||||||
|
{
|
||||||
|
/// Result borrowed from the source buffer
|
||||||
|
Borrowed(&'a B),
|
||||||
|
/// Marks that the result has been written into the provided output buffer,
|
||||||
|
Owned,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a, B: 'a + ToOwned + ?Sized> MaybeBorrowed<'a, B> {
|
||||||
|
/// Create a `Cow<'a, B>` from the `MaybeBorrowed<'a, B>` and the buffer containing the owned data.
|
||||||
|
pub fn into_cow(self, owned_buffer: B::Owned) -> Cow<'a, B> {
|
||||||
|
match self {
|
||||||
|
MaybeBorrowed::Borrowed(b) => Cow::Borrowed(b),
|
||||||
|
MaybeBorrowed::Owned => Cow::Owned(owned_buffer),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get an owned copy of the data, either by copying the borrowed data or using the owned data.
|
||||||
|
pub fn to_owned(&self, owned_buffer: B::Owned) -> B::Owned {
|
||||||
|
match *self {
|
||||||
|
MaybeBorrowed::Borrowed(b) => b.to_owned(),
|
||||||
|
MaybeBorrowed::Owned => owned_buffer,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a, B> MaybeBorrowed<'a, B>
|
||||||
|
where
|
||||||
|
B: 'a + ToOwned + ?Sized,
|
||||||
|
B::Owned: AsRef<B>,
|
||||||
|
{
|
||||||
|
/// Get a reference from either the `MaybeBorrowed<'a, B>` or the buffer containing the owned data.
|
||||||
|
pub fn as_ref<'o>(&'o self, owned_buffer: &'o B::Owned) -> &'o B {
|
||||||
|
match self {
|
||||||
|
MaybeBorrowed::Borrowed(b) => b,
|
||||||
|
MaybeBorrowed::Owned => owned_buffer.as_ref(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -2,14 +2,14 @@ use std::cmp::min;
|
||||||
use std::fmt;
|
use std::fmt;
|
||||||
use std::fmt::Debug;
|
use std::fmt::Debug;
|
||||||
use std::marker::PhantomData;
|
use std::marker::PhantomData;
|
||||||
use std::mem::size_of;
|
use std::mem::{size_of, take};
|
||||||
use std::ops::{BitOrAssign, BitXor, Index, Range, RangeFrom};
|
use std::ops::{BitOrAssign, BitXor, Index, Range, RangeFrom};
|
||||||
|
|
||||||
use num_traits::{Float, PrimInt, WrappingSub};
|
use num_traits::{Float, PrimInt, WrappingSub};
|
||||||
|
|
||||||
use crate::endianness::Endianness;
|
use crate::endianness::Endianness;
|
||||||
use crate::num_traits::{IsSigned, UncheckedPrimitiveFloat, UncheckedPrimitiveInt};
|
use crate::num_traits::{IsSigned, UncheckedPrimitiveFloat, UncheckedPrimitiveInt};
|
||||||
use crate::{BitError, Result};
|
use crate::{BitError, MaybeBorrowed, Result};
|
||||||
use std::borrow::{Borrow, Cow};
|
use std::borrow::{Borrow, Cow};
|
||||||
use std::convert::TryInto;
|
use std::convert::TryInto;
|
||||||
use std::rc::Rc;
|
use std::rc::Rc;
|
||||||
|
|
@ -471,6 +471,8 @@ where
|
||||||
|
|
||||||
/// Read a series of bytes from the buffer
|
/// Read a series of bytes from the buffer
|
||||||
///
|
///
|
||||||
|
/// If the `position` is a multiple of 8, the output will be borrowed, otherwise it will be copied into a new Vec.
|
||||||
|
///
|
||||||
/// # Errors
|
/// # Errors
|
||||||
///
|
///
|
||||||
/// - [`ReadError::NotEnoughData`]: not enough bits available in the buffer
|
/// - [`ReadError::NotEnoughData`]: not enough bits available in the buffer
|
||||||
|
|
@ -499,6 +501,54 @@ where
|
||||||
/// [`ReadError::NotEnoughData`]: enum.ReadError.html#variant.NotEnoughData
|
/// [`ReadError::NotEnoughData`]: enum.ReadError.html#variant.NotEnoughData
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn read_bytes(&self, position: usize, byte_count: usize) -> Result<Cow<'a, [u8]>> {
|
pub fn read_bytes(&self, position: usize, byte_count: usize) -> Result<Cow<'a, [u8]>> {
|
||||||
|
let mut output = Vec::new();
|
||||||
|
let result = self.read_bytes_into(position, byte_count, &mut output)?;
|
||||||
|
Ok(result.into_cow(output))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Read a series of bytes from the buffer, using an existing buffer
|
||||||
|
///
|
||||||
|
/// If the `position` is a multiple of 8, the output will be borrowed, otherwise it will be written to the provided output buffer
|
||||||
|
///
|
||||||
|
/// See `[MaybeBorrowed]` for more information for dealing with the result.
|
||||||
|
///
|
||||||
|
/// # Errors
|
||||||
|
///
|
||||||
|
/// - [`ReadError::NotEnoughData`]: not enough bits available in the buffer
|
||||||
|
///
|
||||||
|
/// # Examples
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// # use bitbuffer::{BitReadBuffer, LittleEndian, Result};
|
||||||
|
/// #
|
||||||
|
/// # fn main() -> Result<()> {
|
||||||
|
/// # let bytes = vec![
|
||||||
|
/// # 0b1011_0101, 0b0110_1010, 0b1010_1100, 0b1001_1001,
|
||||||
|
/// # 0b1001_1001, 0b1001_1001, 0b1001_1001, 0b1110_0111
|
||||||
|
/// # ];
|
||||||
|
/// # let buffer = BitReadBuffer::new(&bytes, LittleEndian);
|
||||||
|
/// let mut output = Vec::new();
|
||||||
|
/// let result = buffer.read_bytes_into(5, 3, &mut output)?;
|
||||||
|
/// assert_eq!(result.as_ref(&output), &[0b0_1010_101, 0b0_1100_011, 0b1_1001_101]);
|
||||||
|
/// output.clear();
|
||||||
|
/// let result = buffer.read_bytes_into(0, 8, &mut output)?;
|
||||||
|
/// assert_eq!(result.as_ref(&output), &[
|
||||||
|
/// 0b1011_0101, 0b0110_1010, 0b1010_1100, 0b1001_1001,
|
||||||
|
/// 0b1001_1001, 0b1001_1001, 0b1001_1001, 0b1110_0111
|
||||||
|
/// ]);
|
||||||
|
/// #
|
||||||
|
/// # Ok(())
|
||||||
|
/// # }
|
||||||
|
/// ```
|
||||||
|
///
|
||||||
|
/// [`ReadError::NotEnoughData`]: enum.ReadError.html#variant.NotEnoughData
|
||||||
|
#[inline]
|
||||||
|
pub fn read_bytes_into(
|
||||||
|
&self,
|
||||||
|
position: usize,
|
||||||
|
byte_count: usize,
|
||||||
|
output: &mut Vec<u8>,
|
||||||
|
) -> Result<MaybeBorrowed<'a, [u8]>> {
|
||||||
if position + byte_count * 8 > self.bit_len() {
|
if position + byte_count * 8 > self.bit_len() {
|
||||||
if position > self.bit_len() {
|
if position > self.bit_len() {
|
||||||
return Err(BitError::IndexOutOfBounds {
|
return Err(BitError::IndexOutOfBounds {
|
||||||
|
|
@ -513,20 +563,34 @@ where
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(unsafe { self.read_bytes_unchecked(position, byte_count) })
|
Ok(unsafe { self.read_bytes_unchecked_into(position, byte_count, output) })
|
||||||
}
|
}
|
||||||
|
|
||||||
#[doc(hidden)]
|
#[doc(hidden)]
|
||||||
#[inline]
|
#[inline]
|
||||||
pub unsafe fn read_bytes_unchecked(&self, position: usize, byte_count: usize) -> Cow<'a, [u8]> {
|
pub unsafe fn read_bytes_unchecked(&self, position: usize, byte_count: usize) -> Cow<'a, [u8]> {
|
||||||
|
let mut output = Vec::new();
|
||||||
|
let result = self.read_bytes_unchecked_into(position, byte_count, &mut output);
|
||||||
|
result.into_cow(output)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[doc(hidden)]
|
||||||
|
#[inline]
|
||||||
|
pub unsafe fn read_bytes_unchecked_into(
|
||||||
|
&self,
|
||||||
|
position: usize,
|
||||||
|
byte_count: usize,
|
||||||
|
output: &mut Vec<u8>,
|
||||||
|
) -> MaybeBorrowed<'a, [u8]> {
|
||||||
let shift = position & 7;
|
let shift = position & 7;
|
||||||
|
|
||||||
if shift == 0 {
|
if shift == 0 {
|
||||||
let byte_pos = position / 8;
|
let byte_pos = position / 8;
|
||||||
return Cow::Borrowed(&self.slice[byte_pos..byte_pos + byte_count]);
|
return MaybeBorrowed::Borrowed(&self.slice[byte_pos..byte_pos + byte_count]);
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut data = Vec::with_capacity(byte_count);
|
output.clear();
|
||||||
|
output.reserve(byte_count);
|
||||||
let mut byte_left = byte_count;
|
let mut byte_left = byte_count;
|
||||||
let mut read_pos = position / 8;
|
let mut read_pos = position / 8;
|
||||||
|
|
||||||
|
|
@ -536,7 +600,7 @@ where
|
||||||
let bytes = raw.to_le_bytes();
|
let bytes = raw.to_le_bytes();
|
||||||
let read_bytes = USIZE_SIZE - 1;
|
let read_bytes = USIZE_SIZE - 1;
|
||||||
let usable_bytes = &bytes[0..read_bytes];
|
let usable_bytes = &bytes[0..read_bytes];
|
||||||
data.extend_from_slice(usable_bytes);
|
output.extend_from_slice(usable_bytes);
|
||||||
|
|
||||||
read_pos += read_bytes;
|
read_pos += read_bytes;
|
||||||
byte_left -= read_bytes;
|
byte_left -= read_bytes;
|
||||||
|
|
@ -544,22 +608,24 @@ where
|
||||||
|
|
||||||
let bytes = self.read_shifted_usize(read_pos, shift, true).to_le_bytes();
|
let bytes = self.read_shifted_usize(read_pos, shift, true).to_le_bytes();
|
||||||
let usable_bytes = &bytes[0..byte_left];
|
let usable_bytes = &bytes[0..byte_left];
|
||||||
data.extend_from_slice(usable_bytes);
|
output.extend_from_slice(usable_bytes);
|
||||||
} else {
|
} else {
|
||||||
let mut pos = position;
|
let mut pos = position;
|
||||||
while byte_left > 0 {
|
while byte_left > 0 {
|
||||||
data.push(self.read_int_unchecked::<u8>(pos, 8, true));
|
output.push(self.read_int_unchecked::<u8>(pos, 8, true));
|
||||||
byte_left -= 1;
|
byte_left -= 1;
|
||||||
pos += 8;
|
pos += 8;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Cow::Owned(data)
|
MaybeBorrowed::Owned
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Read a series of bytes from the buffer as string
|
/// Read a series of bytes from the buffer as string.
|
||||||
///
|
///
|
||||||
/// You can either read a fixed number of bytes, or a dynamic length null-terminated string
|
/// You can either read a fixed number of bytes, or a dynamic length null-terminated string.
|
||||||
|
///
|
||||||
|
/// If the `position` is a multiple of 8, the output will be borrowed, otherwise it will be copied into a new String.
|
||||||
///
|
///
|
||||||
/// # Errors
|
/// # Errors
|
||||||
///
|
///
|
||||||
|
|
@ -580,7 +646,7 @@ where
|
||||||
/// # ];
|
/// # ];
|
||||||
/// # let buffer = BitReadBuffer::new(&bytes, LittleEndian);
|
/// # let buffer = BitReadBuffer::new(&bytes, LittleEndian);
|
||||||
/// // Fixed length string
|
/// // Fixed length string
|
||||||
/// assert_eq!(buffer.read_string(0, Some(13))?, "Hello world".to_owned());
|
/// assert_eq!(buffer.read_string(0, Some(11))?, "Hello world".to_owned());
|
||||||
/// // fixed length with null padding
|
/// // fixed length with null padding
|
||||||
/// assert_eq!(buffer.read_string(0, Some(16))?, "Hello world".to_owned());
|
/// assert_eq!(buffer.read_string(0, Some(16))?, "Hello world".to_owned());
|
||||||
/// // null terminated
|
/// // null terminated
|
||||||
|
|
@ -594,34 +660,92 @@ where
|
||||||
/// [`ReadError::Utf8Error`]: enum.ReadError.html#variant.Utf8Error
|
/// [`ReadError::Utf8Error`]: enum.ReadError.html#variant.Utf8Error
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn read_string(&self, position: usize, byte_len: Option<usize>) -> Result<Cow<'a, str>> {
|
pub fn read_string(&self, position: usize, byte_len: Option<usize>) -> Result<Cow<'a, str>> {
|
||||||
|
let mut output = String::new();
|
||||||
|
let result = self.read_string_into(position, byte_len, &mut output)?;
|
||||||
|
Ok(result.into_cow(output))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Read a series of bytes from the buffer as string.
|
||||||
|
///
|
||||||
|
/// You can either read a fixed number of bytes, or a dynamic length null-terminated string.
|
||||||
|
///
|
||||||
|
/// If the `position` is a multiple of 8, the output will be borrowed, otherwise it will be copied into the provided String.
|
||||||
|
///
|
||||||
|
/// # Errors
|
||||||
|
///
|
||||||
|
/// - [`ReadError::NotEnoughData`]: not enough bits available in the buffer
|
||||||
|
/// - [`ReadError::Utf8Error`]: the read bytes are not valid utf8
|
||||||
|
///
|
||||||
|
/// # Examples
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// # use bitbuffer::{BitReadBuffer, BitReadStream, LittleEndian, Result};
|
||||||
|
/// #
|
||||||
|
/// # fn main() -> Result<()> {
|
||||||
|
/// # let bytes = vec![
|
||||||
|
/// # 0x48, 0x65, 0x6c, 0x6c,
|
||||||
|
/// # 0x6f, 0x20, 0x77, 0x6f,
|
||||||
|
/// # 0x72, 0x6c, 0x64, 0,
|
||||||
|
/// # 0, 0, 0, 0
|
||||||
|
/// # ];
|
||||||
|
/// # let buffer = BitReadBuffer::new(&bytes, LittleEndian);
|
||||||
|
/// let mut output = String::new();
|
||||||
|
/// // Fixed length string
|
||||||
|
/// let result = buffer.read_string_into(0, Some(11), &mut output)?;
|
||||||
|
/// assert_eq!(result.as_ref(&output), "Hello world");
|
||||||
|
/// // fixed length with null padding
|
||||||
|
/// let result = buffer.read_string_into(0, Some(16), &mut output)?;
|
||||||
|
/// assert_eq!(result.as_ref(&output), "Hello world".to_owned());
|
||||||
|
/// // null terminated
|
||||||
|
/// let result = buffer.read_string_into(0, None, &mut output)?;
|
||||||
|
/// assert_eq!(result.as_ref(&output), "Hello world".to_owned());
|
||||||
|
/// #
|
||||||
|
/// # Ok(())
|
||||||
|
/// # }
|
||||||
|
/// ```
|
||||||
|
///
|
||||||
|
/// [`ReadError::NotEnoughData`]: enum.ReadError.html#variant.NotEnoughData
|
||||||
|
/// [`ReadError::Utf8Error`]: enum.ReadError.html#variant.Utf8Error
|
||||||
|
#[inline]
|
||||||
|
pub fn read_string_into(
|
||||||
|
&self,
|
||||||
|
position: usize,
|
||||||
|
byte_len: Option<usize>,
|
||||||
|
output: &mut String,
|
||||||
|
) -> Result<MaybeBorrowed<'a, str>> {
|
||||||
|
let mut taken_output = take(output).into_bytes();
|
||||||
match byte_len {
|
match byte_len {
|
||||||
Some(byte_len) => {
|
Some(byte_len) => {
|
||||||
let bytes = self.read_bytes(position, byte_len)?;
|
let bytes = self.read_bytes_into(position, byte_len, &mut taken_output)?;
|
||||||
|
|
||||||
let string = match bytes {
|
let result = match bytes {
|
||||||
Cow::Owned(bytes) => Cow::Owned(
|
MaybeBorrowed::Owned => {
|
||||||
String::from_utf8(bytes)?
|
*output = String::from_utf8(taken_output)?
|
||||||
.trim_end_matches(char::from(0))
|
.trim_end_matches(char::from(0))
|
||||||
.to_string(),
|
.to_string();
|
||||||
),
|
MaybeBorrowed::Owned
|
||||||
Cow::Borrowed(bytes) => Cow::Borrowed(
|
}
|
||||||
|
MaybeBorrowed::Borrowed(bytes) => MaybeBorrowed::Borrowed(
|
||||||
std::str::from_utf8(bytes)
|
std::str::from_utf8(bytes)
|
||||||
.map_err(|err| BitError::Utf8Error(err, bytes.len()))?
|
.map_err(|err| BitError::Utf8Error(err, bytes.len()))?
|
||||||
.trim_end_matches(char::from(0)),
|
.trim_end_matches(char::from(0)),
|
||||||
),
|
),
|
||||||
};
|
};
|
||||||
Ok(string)
|
Ok(result)
|
||||||
}
|
}
|
||||||
None => {
|
None => {
|
||||||
let bytes = self.read_string_bytes(position)?;
|
let bytes = self.read_string_bytes(position, &mut taken_output)?;
|
||||||
let string = match bytes {
|
let result = match bytes {
|
||||||
Cow::Owned(bytes) => Cow::Owned(String::from_utf8(bytes)?),
|
MaybeBorrowed::Owned => {
|
||||||
Cow::Borrowed(bytes) => Cow::Borrowed(
|
*output = String::from_utf8(taken_output)?;
|
||||||
|
MaybeBorrowed::Owned
|
||||||
|
}
|
||||||
|
MaybeBorrowed::Borrowed(bytes) => MaybeBorrowed::Borrowed(
|
||||||
std::str::from_utf8(bytes)
|
std::str::from_utf8(bytes)
|
||||||
.map_err(|err| BitError::Utf8Error(err, bytes.len()))?,
|
.map_err(|err| BitError::Utf8Error(err, bytes.len()))?,
|
||||||
),
|
),
|
||||||
};
|
};
|
||||||
Ok(string)
|
Ok(result)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -634,15 +758,20 @@ where
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
fn read_string_bytes(&self, position: usize) -> Result<Cow<'a, [u8]>> {
|
fn read_string_bytes<'output>(
|
||||||
|
&self,
|
||||||
|
position: usize,
|
||||||
|
buffer: &'output mut Vec<u8>,
|
||||||
|
) -> Result<MaybeBorrowed<'a, [u8]>> {
|
||||||
let shift = position & 7;
|
let shift = position & 7;
|
||||||
if shift == 0 {
|
if shift == 0 {
|
||||||
let byte_index = position / 8;
|
let byte_index = position / 8;
|
||||||
Ok(Cow::Borrowed(
|
Ok(MaybeBorrowed::Borrowed(
|
||||||
&self.slice[byte_index..self.find_null_byte(byte_index)],
|
&self.slice[byte_index..self.find_null_byte(byte_index)],
|
||||||
))
|
))
|
||||||
} else {
|
} else {
|
||||||
let mut acc = Vec::with_capacity(32);
|
buffer.clear();
|
||||||
|
buffer.reserve(64);
|
||||||
if E::is_le() {
|
if E::is_le() {
|
||||||
let mut byte_index = position / 8;
|
let mut byte_index = position / 8;
|
||||||
loop {
|
loop {
|
||||||
|
|
@ -661,13 +790,13 @@ where
|
||||||
if has_null {
|
if has_null {
|
||||||
for i in 0..USIZE_SIZE - 1 {
|
for i in 0..USIZE_SIZE - 1 {
|
||||||
if usable_bytes[i] == 0 {
|
if usable_bytes[i] == 0 {
|
||||||
acc.extend_from_slice(&usable_bytes[0..i]);
|
buffer.extend_from_slice(&usable_bytes[0..i]);
|
||||||
return Ok(Cow::Owned(acc));
|
return Ok(MaybeBorrowed::Owned);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
acc.extend_from_slice(&usable_bytes[0..USIZE_SIZE - 1]);
|
buffer.extend_from_slice(&usable_bytes[0..USIZE_SIZE - 1]);
|
||||||
|
|
||||||
byte_index += USIZE_SIZE - 1;
|
byte_index += USIZE_SIZE - 1;
|
||||||
}
|
}
|
||||||
|
|
@ -677,9 +806,9 @@ where
|
||||||
let byte = self.read_int::<u8>(pos, 8)?;
|
let byte = self.read_int::<u8>(pos, 8)?;
|
||||||
pos += 8;
|
pos += 8;
|
||||||
if byte == 0 {
|
if byte == 0 {
|
||||||
return Ok(Cow::Owned(acc));
|
return Ok(MaybeBorrowed::Owned);
|
||||||
} else {
|
} else {
|
||||||
acc.push(byte);
|
buffer.push(byte);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -6,8 +6,8 @@ use num_traits::{Float, PrimInt, WrappingSub};
|
||||||
use crate::endianness::Endianness;
|
use crate::endianness::Endianness;
|
||||||
use crate::num_traits::{IsSigned, UncheckedPrimitiveFloat, UncheckedPrimitiveInt};
|
use crate::num_traits::{IsSigned, UncheckedPrimitiveFloat, UncheckedPrimitiveInt};
|
||||||
use crate::readbuffer::Data;
|
use crate::readbuffer::Data;
|
||||||
use crate::BitReadBuffer;
|
|
||||||
use crate::{BitError, BitRead, BitReadSized, Result};
|
use crate::{BitError, BitRead, BitReadSized, Result};
|
||||||
|
use crate::{BitReadBuffer, MaybeBorrowed};
|
||||||
use std::borrow::Cow;
|
use std::borrow::Cow;
|
||||||
use std::cmp::min;
|
use std::cmp::min;
|
||||||
|
|
||||||
|
|
@ -214,6 +214,8 @@ where
|
||||||
|
|
||||||
/// Read a series of bytes from the stream
|
/// Read a series of bytes from the stream
|
||||||
///
|
///
|
||||||
|
/// If the current read position is on a byte boundary, the output will be borrowed, otherwise it will be copied into a new Vec
|
||||||
|
///
|
||||||
/// # Errors
|
/// # Errors
|
||||||
///
|
///
|
||||||
/// - [`ReadError::NotEnoughData`]: not enough bits available in the stream
|
/// - [`ReadError::NotEnoughData`]: not enough bits available in the stream
|
||||||
|
|
@ -249,6 +251,51 @@ where
|
||||||
result
|
result
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Read a series of bytes from the stream
|
||||||
|
///
|
||||||
|
/// If the current read position is on a byte boundary, the output will be borrowed, otherwise it will be written to the provided output buffer
|
||||||
|
///
|
||||||
|
/// # Errors
|
||||||
|
///
|
||||||
|
/// - [`ReadError::NotEnoughData`]: not enough bits available in the stream
|
||||||
|
///
|
||||||
|
/// # Examples
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// # use bitbuffer::{BitReadBuffer, BitReadStream, LittleEndian, Result};
|
||||||
|
/// #
|
||||||
|
/// # fn main() -> Result<()> {
|
||||||
|
/// # use std::borrow::Borrow;
|
||||||
|
/// let bytes = vec![
|
||||||
|
/// # 0b1011_0101, 0b0110_1010, 0b1010_1100, 0b1001_1001,
|
||||||
|
/// # 0b1001_1001, 0b1001_1001, 0b1001_1001, 0b1110_0111
|
||||||
|
/// # ];
|
||||||
|
/// # let buffer = BitReadBuffer::new(&bytes, LittleEndian);
|
||||||
|
/// # let mut stream = BitReadStream::new(buffer);
|
||||||
|
/// let mut output = Vec::new();
|
||||||
|
/// let result = stream.read_bytes_into(3, &mut output)?;
|
||||||
|
/// assert_eq!(result.as_ref(&output), &[0b1011_0101, 0b0110_1010, 0b1010_1100]);
|
||||||
|
/// assert_eq!(stream.pos(), 24);
|
||||||
|
/// #
|
||||||
|
/// # Ok(())
|
||||||
|
/// # }
|
||||||
|
/// ```
|
||||||
|
///
|
||||||
|
/// [`ReadError::NotEnoughData`]: enum.ReadError.html#variant.NotEnoughData
|
||||||
|
#[inline]
|
||||||
|
pub fn read_bytes_into(
|
||||||
|
&mut self,
|
||||||
|
byte_count: usize,
|
||||||
|
output: &mut Vec<u8>,
|
||||||
|
) -> Result<MaybeBorrowed<'a, [u8]>> {
|
||||||
|
let count = byte_count * 8;
|
||||||
|
let result = self.buffer.read_bytes_into(self.pos, byte_count, output);
|
||||||
|
if result.is_ok() {
|
||||||
|
self.pos += count;
|
||||||
|
}
|
||||||
|
result
|
||||||
|
}
|
||||||
|
|
||||||
#[doc(hidden)]
|
#[doc(hidden)]
|
||||||
#[inline]
|
#[inline]
|
||||||
pub unsafe fn read_bytes_unchecked(&mut self, byte_count: usize) -> Cow<'a, [u8]> {
|
pub unsafe fn read_bytes_unchecked(&mut self, byte_count: usize) -> Cow<'a, [u8]> {
|
||||||
|
|
@ -258,10 +305,27 @@ where
|
||||||
result
|
result
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[doc(hidden)]
|
||||||
|
#[inline]
|
||||||
|
pub unsafe fn read_bytes_unchecked_into(
|
||||||
|
&mut self,
|
||||||
|
byte_count: usize,
|
||||||
|
output: &mut Vec<u8>,
|
||||||
|
) -> MaybeBorrowed<'a, [u8]> {
|
||||||
|
let count = byte_count * 8;
|
||||||
|
let result = self
|
||||||
|
.buffer
|
||||||
|
.read_bytes_unchecked_into(self.pos, byte_count, output);
|
||||||
|
self.pos += count;
|
||||||
|
result
|
||||||
|
}
|
||||||
|
|
||||||
/// Read a series of bytes from the stream as utf8 string
|
/// Read a series of bytes from the stream as utf8 string
|
||||||
///
|
///
|
||||||
/// You can either read a fixed number of bytes, or a dynamic length null-terminated string
|
/// You can either read a fixed number of bytes, or a dynamic length null-terminated string
|
||||||
///
|
///
|
||||||
|
/// If the current read position is on a byte boundary, the output will be borrowed, otherwise it will be copied into a new String
|
||||||
|
///
|
||||||
/// # Errors
|
/// # Errors
|
||||||
///
|
///
|
||||||
/// - [`ReadError::NotEnoughData`]: not enough bits available in the stream
|
/// - [`ReadError::NotEnoughData`]: not enough bits available in the stream
|
||||||
|
|
@ -302,11 +366,70 @@ where
|
||||||
/// [`ReadError::Utf8Error`]: enum.ReadError.html#variant.Utf8Error
|
/// [`ReadError::Utf8Error`]: enum.ReadError.html#variant.Utf8Error
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn read_string(&mut self, byte_len: Option<usize>) -> Result<Cow<'a, str>> {
|
pub fn read_string(&mut self, byte_len: Option<usize>) -> Result<Cow<'a, str>> {
|
||||||
|
let mut output = String::new();
|
||||||
|
let result = self.read_string_into(byte_len, &mut output)?;
|
||||||
|
Ok(result.into_cow(output))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Read a series of bytes from the stream as utf8 string
|
||||||
|
///
|
||||||
|
/// You can either read a fixed number of bytes, or a dynamic length null-terminated string
|
||||||
|
///
|
||||||
|
/// If the current read position is on a byte boundary, the output will be borrowed, otherwise it will be written to the provided output String
|
||||||
|
///
|
||||||
|
/// # Errors
|
||||||
|
///
|
||||||
|
/// - [`ReadError::NotEnoughData`]: not enough bits available in the stream
|
||||||
|
/// - [`ReadError::Utf8Error`]: the read bytes are not valid utf8
|
||||||
|
///
|
||||||
|
/// # Examples
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// # use bitbuffer::{BitReadBuffer, BitReadStream, LittleEndian, Result};
|
||||||
|
/// #
|
||||||
|
/// # fn main() -> Result<()> {
|
||||||
|
/// # let bytes = vec![
|
||||||
|
/// # 0x48, 0x65, 0x6c, 0x6c,
|
||||||
|
/// # 0x6f, 0x20, 0x77, 0x6f,
|
||||||
|
/// # 0x72, 0x6c, 0x64, 0,
|
||||||
|
/// # 0, 0, 0, 0
|
||||||
|
/// # ];
|
||||||
|
/// # let buffer = BitReadBuffer::new(&bytes, LittleEndian);
|
||||||
|
/// # let mut stream = BitReadStream::new(buffer);
|
||||||
|
/// // Fixed length string
|
||||||
|
/// stream.set_pos(0);
|
||||||
|
/// let mut output = String::new();
|
||||||
|
/// let result = stream.read_string_into(Some(11), &mut output)?;
|
||||||
|
/// assert_eq!(result.as_ref(&output), "Hello world".to_owned());
|
||||||
|
/// assert_eq!(11 * 8, stream.pos());
|
||||||
|
/// // fixed length with null padding
|
||||||
|
/// stream.set_pos(0);
|
||||||
|
/// let result = stream.read_string_into(Some(16), &mut output)?;
|
||||||
|
/// assert_eq!(result.as_ref(&output), "Hello world".to_owned());
|
||||||
|
/// assert_eq!(16 * 8, stream.pos());
|
||||||
|
/// // null terminated
|
||||||
|
/// stream.set_pos(0);
|
||||||
|
/// let result = stream.read_string_into(None, &mut output)?;
|
||||||
|
/// assert_eq!(result.as_ref(&output), "Hello world".to_owned());
|
||||||
|
/// assert_eq!(12 * 8, stream.pos()); // 1 more for the terminating null byte
|
||||||
|
/// #
|
||||||
|
/// # Ok(())
|
||||||
|
/// # }
|
||||||
|
/// ```
|
||||||
|
///
|
||||||
|
/// [`ReadError::NotEnoughData`]: enum.ReadError.html#variant.NotEnoughData
|
||||||
|
/// [`ReadError::Utf8Error`]: enum.ReadError.html#variant.Utf8Error
|
||||||
|
#[inline]
|
||||||
|
pub fn read_string_into(
|
||||||
|
&mut self,
|
||||||
|
byte_len: Option<usize>,
|
||||||
|
output: &mut String,
|
||||||
|
) -> Result<MaybeBorrowed<'a, str>> {
|
||||||
let max_length = self.bits_left() / 8;
|
let max_length = self.bits_left() / 8;
|
||||||
|
|
||||||
let result = self
|
let result = self
|
||||||
.buffer
|
.buffer
|
||||||
.read_string(self.pos, byte_len)
|
.read_string_into(self.pos, byte_len, output)
|
||||||
.map_err(|mut err| {
|
.map_err(|mut err| {
|
||||||
// still advance the stream on malformed utf8
|
// still advance the stream on malformed utf8
|
||||||
if let BitError::Utf8Error(_, len) = &mut err {
|
if let BitError::Utf8Error(_, len) = &mut err {
|
||||||
|
|
@ -319,25 +442,32 @@ where
|
||||||
}
|
}
|
||||||
err
|
err
|
||||||
})?;
|
})?;
|
||||||
|
|
||||||
|
let str = result.as_ref(output);
|
||||||
let read = match byte_len {
|
let read = match byte_len {
|
||||||
Some(len) => len * 8,
|
Some(len) => len * 8,
|
||||||
None => (result.len() + 1) * 8,
|
None => (str.len() + 1) * 8,
|
||||||
};
|
};
|
||||||
|
|
||||||
// due to how sub buffer/streams work, the result string can be longer than the current stream
|
// due to how sub buffer/streams work, the result string can be longer than the current stream
|
||||||
// (but not the top level buffer)
|
// (but not the top level buffer)
|
||||||
// thus we trim the resulting string to make sure it fits in the source stream
|
// thus we trim the resulting string to make sure it fits in the source stream
|
||||||
if read > self.bits_left() {
|
if read > self.bits_left() {
|
||||||
// find the maximum well-formed utf8 string that fits in max_len
|
let mut target_length = str.len().min(max_length);
|
||||||
let mut acc = String::with_capacity(max_length);
|
// find the closest char boundary
|
||||||
for c in result.chars() {
|
while !str.is_char_boundary(target_length) {
|
||||||
if acc.len() + c.len_utf8() > max_length {
|
target_length -= 1;
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
acc.push(c);
|
|
||||||
|
let trimmed = match result {
|
||||||
|
MaybeBorrowed::Owned => {
|
||||||
|
output.truncate(target_length);
|
||||||
|
MaybeBorrowed::Owned
|
||||||
}
|
}
|
||||||
self.pos += acc.len() * 8;
|
MaybeBorrowed::Borrowed(s) => MaybeBorrowed::Borrowed(&s[0..target_length]),
|
||||||
return Ok(Cow::Owned(acc));
|
};
|
||||||
|
self.pos += target_length * 8;
|
||||||
|
return Ok(trimmed);
|
||||||
}
|
}
|
||||||
self.pos += read;
|
self.pos += read;
|
||||||
Ok(result)
|
Ok(result)
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue