1
0
Fork 0
mirror of https://codeberg.org/icewind/bitbuffer.git synced 2026-06-03 16:44:06 +02:00
This commit is contained in:
Robin Appelman 2020-12-06 17:35:03 +01:00
commit 51faeababf
5 changed files with 125 additions and 51 deletions

View file

@ -23,7 +23,7 @@ fn read_perf<E: Endianness>(buffer: &BitReadBuffer<E>) -> u16 {
#[bench]
fn perf_le(b: &mut Bencher) {
let data = vec![1u8; 1024 * 1024 * 10];
let buffer = BitReadBuffer::new(data, LittleEndian);
let buffer = BitReadBuffer::new(&data, LittleEndian);
b.iter(|| {
let data = read_perf(&buffer);
assert_eq!(data, 0);
@ -34,7 +34,7 @@ fn perf_le(b: &mut Bencher) {
#[bench]
fn perf_be(b: &mut Bencher) {
let data = vec![1u8; 1024 * 1024 * 10];
let buffer = BitReadBuffer::new(data, BigEndian);
let buffer = BitReadBuffer::new(&data, BigEndian);
b.iter(|| {
let data = read_perf(&buffer);
assert_eq!(data, 0);
@ -45,7 +45,7 @@ fn perf_be(b: &mut Bencher) {
#[bench]
fn perf_f32_be(b: &mut Bencher) {
let data = vec![1u8; 1024 * 1024 * 10];
let buffer = BitReadBuffer::new(data, BigEndian);
let buffer = BitReadBuffer::new(&data, BigEndian);
b.iter(|| {
let mut pos = 0;
let len = buffer.bit_len();
@ -66,7 +66,7 @@ fn perf_f32_be(b: &mut Bencher) {
#[bench]
fn perf_f32_le(b: &mut Bencher) {
let data = vec![1u8; 1024 * 1024 * 10];
let buffer = BitReadBuffer::new(data, LittleEndian);
let buffer = BitReadBuffer::new(&data, LittleEndian);
b.iter(|| {
let mut pos = 0;
let len = buffer.bit_len();
@ -89,7 +89,7 @@ const F64_RESULT: f64 = 0.000000000000000000000000000000000000000000000000000000
#[bench]
fn perf_f64(b: &mut Bencher) {
let data = vec![1u8; 1024 * 1024 * 10];
let buffer = BitReadBuffer::new(data, BigEndian);
let buffer = BitReadBuffer::new(&data, BigEndian);
b.iter(|| {
let mut pos = 0;
let len = buffer.bit_len();
@ -110,7 +110,7 @@ fn perf_f64(b: &mut Bencher) {
#[bench]
fn perf_bool(b: &mut Bencher) {
let data = vec![1u8; 1024 * 1024 * 1];
let buffer = BitReadBuffer::new(data, BigEndian);
let buffer = BitReadBuffer::new(&data, BigEndian);
b.iter(|| {
let mut pos = 0;
let len = buffer.bit_len();
@ -151,7 +151,8 @@ fn get_string_buffer() -> Vec<u8> {
#[bench]
fn perf_string_be(b: &mut Bencher) {
let buffer = BitReadBuffer::new(get_string_buffer(), BigEndian);
let data = get_string_buffer();
let buffer = BitReadBuffer::new(&data, BigEndian);
b.iter(|| {
let mut pos = 0;
@ -169,7 +170,8 @@ fn perf_string_be(b: &mut Bencher) {
#[bench]
fn perf_string_le(b: &mut Bencher) {
let buffer = BitReadBuffer::new(get_string_buffer(), LittleEndian);
let data = get_string_buffer();
let buffer = BitReadBuffer::new(&data, LittleEndian);
b.iter(|| {
let mut pos = 0;
@ -187,7 +189,8 @@ fn perf_string_le(b: &mut Bencher) {
#[bench]
fn perf_bytes_be(b: &mut Bencher) {
let buffer = BitReadBuffer::new(get_string_buffer(), BigEndian);
let data = get_string_buffer();
let buffer = BitReadBuffer::new(&data, BigEndian);
b.iter(|| {
let mut pos = 0;
@ -205,7 +208,8 @@ fn perf_bytes_be(b: &mut Bencher) {
#[bench]
fn perf_bytes_le(b: &mut Bencher) {
let buffer = BitReadBuffer::new(get_string_buffer(), LittleEndian);
let data = get_string_buffer();
let buffer = BitReadBuffer::new(&data, LittleEndian);
b.iter(|| {
let mut pos = 0;
@ -223,7 +227,8 @@ fn perf_bytes_le(b: &mut Bencher) {
#[bench]
fn perf_bytes_be_unaligned(b: &mut Bencher) {
let buffer = BitReadBuffer::new(get_string_buffer(), BigEndian);
let data = get_string_buffer();
let buffer = BitReadBuffer::new(&data, BigEndian);
b.iter(|| {
let mut pos = 3;
@ -241,7 +246,8 @@ fn perf_bytes_be_unaligned(b: &mut Bencher) {
#[bench]
fn perf_bytes_le_unaligned(b: &mut Bencher) {
let buffer = BitReadBuffer::new(get_string_buffer(), LittleEndian);
let data = get_string_buffer();
let buffer = BitReadBuffer::new(&data, LittleEndian);
b.iter(|| {
let mut pos = 3;
@ -268,7 +274,8 @@ struct BasicStruct {
#[bench]
fn perf_struct(b: &mut Bencher) {
let buffer = BitReadBuffer::new(get_string_buffer(), LittleEndian);
let data = get_string_buffer();
let buffer = BitReadBuffer::new(&data, LittleEndian);
b.iter(|| {
let mut stream: BitReadStream<LittleEndian> = buffer.clone().into();

View file

@ -55,13 +55,14 @@
#![warn(missing_docs)]
use err_derive::Error;
pub use std::string::FromUtf8Error;
pub use bitbuffer_derive::{BitRead, BitReadSized};
pub use endianness::*;
pub use read::{BitRead, BitReadSized, LazyBitRead, LazyBitReadSized};
pub use readbuffer::BitReadBuffer;
pub use readstream::BitReadStream;
use std::str::Utf8Error;
use std::string::FromUtf8Error;
pub use writestream::BitWriteStream;
mod endianness;
@ -124,7 +125,7 @@ pub enum BitError {
},
/// The read slice of bytes are not valid utf8
#[error(display = "The read slice of bytes are not valid utf8: {}", _0)]
Utf8Error(#[error(source)] FromUtf8Error),
Utf8Error(#[error(source)] Utf8Error),
/// The string that was requested to be written does not fit in the specified fixed length
#[error(
display = "The string that was requested to be written does not fit in the specified fixed length, string is {} bytes long, while a size of {} has been specified",
@ -139,6 +140,12 @@ pub enum BitError {
},
}
impl From<FromUtf8Error> for BitError {
fn from(err: FromUtf8Error) -> Self {
BitError::from(err.utf8_error())
}
}
/// Either the read bits in the requested format or a [`ReadError`](enum.ReadError.html)
pub type Result<T> = std::result::Result<T, BitError>;

View file

@ -1,5 +1,6 @@
use crate::endianness::{BigEndian, LittleEndian};
use crate::{BitReadStream, Endianness, Result};
use std::borrow::Cow;
use std::cell::RefCell;
use std::cmp::min;
use std::collections::HashMap;
@ -261,6 +262,13 @@ impl<E: Endianness> BitRead<'_, E> for bool {
impl<E: Endianness> BitRead<'_, E> for String {
#[inline]
fn read(stream: &mut BitReadStream<E>) -> Result<String> {
Ok(stream.read_string(None)?.into_owned())
}
}
impl<'a, E: Endianness> BitRead<'a, E> for Cow<'a, str> {
#[inline]
fn read(stream: &mut BitReadStream<'a, E>) -> Result<Cow<'a, str>> {
stream.read_string(None)
}
}
@ -477,6 +485,18 @@ impl_read_int_sized!(i128);
impl<E: Endianness> BitReadSized<'_, E> for String {
#[inline]
fn read(stream: &mut BitReadStream<E>, size: usize) -> Result<String> {
Ok(stream.read_string(Some(size))?.into_owned())
}
#[inline]
fn bit_size_sized(size: usize) -> Option<usize> {
Some(8 * size)
}
}
impl<'a, E: Endianness> BitReadSized<'a, E> for Cow<'a, str> {
#[inline]
fn read(stream: &mut BitReadStream<'a, E>, size: usize) -> Result<Cow<'a, str>> {
stream.read_string(Some(size))
}
@ -486,6 +506,18 @@ impl<E: Endianness> BitReadSized<'_, E> for String {
}
}
impl<'a, E: Endianness> BitReadSized<'a, E> for Cow<'a, [u8]> {
#[inline]
fn read(stream: &mut BitReadStream<'a, E>, size: usize) -> Result<Cow<'a, [u8]>> {
stream.read_bytes(size)
}
#[inline]
fn bit_size_sized(size: usize) -> Option<usize> {
Some(8 * size)
}
}
/// Read a boolean, if true, read `T`, else return `None`
impl<'a, E: Endianness, T: BitRead<'a, E>> BitRead<'a, E> for Option<T> {
fn read(stream: &mut BitReadStream<'a, E>) -> Result<Self> {
@ -522,10 +554,24 @@ impl<'a, E: Endianness> BitReadSized<'a, E> for BitReadStream<'a, E> {
/// Read `T` `size` times and return as `Vec<T>`
impl<'a, E: Endianness, T: BitRead<'a, E>> BitReadSized<'a, E> for Vec<T> {
fn read(stream: &mut BitReadStream<'a, E>, size: usize) -> Result<Self> {
// todo check size and use unchecked
let mut vec = Vec::with_capacity(min(size, 128));
for _ in 0..size {
vec.push(stream.read()?)
match T::bit_size() {
Some(bit_size) => {
if stream.check_read(bit_size * size)? {
for _ in 0..size {
vec.push(unsafe { stream.read_unchecked(true) }?)
}
} else {
for _ in 0..size {
vec.push(unsafe { stream.read_unchecked(false) }?)
}
}
}
_ => {
for _ in 0..size {
vec.push(stream.read()?)
}
}
}
Ok(vec)
}

View file

@ -10,7 +10,7 @@ use num_traits::{Float, PrimInt};
use crate::endianness::Endianness;
use crate::num_traits::{IsSigned, UncheckedPrimitiveFloat, UncheckedPrimitiveInt};
use crate::{BitError, Result};
use std::borrow::Borrow;
use std::borrow::{Borrow, Cow};
use std::convert::TryInto;
use std::rc::Rc;
@ -333,7 +333,7 @@ where
});
}
let end = if position + count + USIZE_BIT_SIZE > self.bit_len() {
if position + count + USIZE_BIT_SIZE > self.bit_len() {
if position + count > self.bit_len() {
return if position > self.bit_len() {
Err(BitError::IndexOutOfBounds {
@ -347,12 +347,10 @@ where
})
};
}
true
Ok(unsafe { self.read_int_unchecked(position, count, true) })
} else {
false
};
Ok(unsafe { self.read_int_unchecked(position, count, end) })
Ok(unsafe { self.read_int_unchecked(position, count, false) })
}
}
#[doc(hidden)]
@ -451,8 +449,8 @@ where
/// # 0b1001_1001, 0b1001_1001, 0b1001_1001, 0b1110_0111
/// # ];
/// # let buffer = BitReadBuffer::new(&bytes, LittleEndian);
/// assert_eq!(buffer.read_bytes(5, 3)?, &[0b0_1010_101, 0b0_1100_011, 0b1_1001_101]);
/// assert_eq!(buffer.read_bytes(0, 8)?, &[
/// assert_eq!(buffer.read_bytes(5, 3)?.to_vec(), &[0b0_1010_101, 0b0_1100_011, 0b1_1001_101]);
/// assert_eq!(buffer.read_bytes(0, 8)?.to_vec(), &[
/// 0b1011_0101, 0b0110_1010, 0b1010_1100, 0b1001_1001,
/// 0b1001_1001, 0b1001_1001, 0b1001_1001, 0b1110_0111
/// ]);
@ -463,7 +461,7 @@ where
///
/// [`ReadError::NotEnoughData`]: enum.ReadError.html#variant.NotEnoughData
#[inline]
pub fn read_bytes(&self, position: usize, byte_count: usize) -> Result<Vec<u8>> {
pub fn read_bytes(&self, position: usize, byte_count: usize) -> Result<Cow<'a, [u8]>> {
if position + byte_count * 8 > self.bit_len() {
if position > self.bit_len() {
return Err(BitError::IndexOutOfBounds {
@ -483,12 +481,12 @@ where
#[doc(hidden)]
#[inline]
pub unsafe fn read_bytes_unchecked(&self, position: usize, byte_count: usize) -> Vec<u8> {
pub unsafe fn read_bytes_unchecked(&self, position: usize, byte_count: usize) -> Cow<'a, [u8]> {
let shift = position & 7;
if shift == 0 {
let byte_pos = position / 8;
return self.slice[byte_pos..byte_pos + byte_count].to_vec();
return Cow::Borrowed(&self.slice[byte_pos..byte_pos + byte_count]);
}
let mut data = Vec::with_capacity(byte_count);
@ -510,7 +508,7 @@ where
let usable_bytes = &bytes[0..byte_left];
data.extend_from_slice(usable_bytes);
data
Cow::Owned(data)
}
/// Read a series of bytes from the buffer as string
@ -549,16 +547,30 @@ where
/// [`ReadError::NotEnoughData`]: enum.ReadError.html#variant.NotEnoughData
/// [`ReadError::Utf8Error`]: enum.ReadError.html#variant.Utf8Error
#[inline]
pub fn read_string(&self, position: usize, byte_len: Option<usize>) -> Result<String> {
pub fn read_string(&self, position: usize, byte_len: Option<usize>) -> Result<Cow<'a, str>> {
match byte_len {
Some(byte_len) => {
let bytes = self.read_bytes(position, byte_len)?;
let raw_string = String::from_utf8(bytes)?;
Ok(raw_string.trim_end_matches(char::from(0)).to_owned())
let string = match bytes {
Cow::Owned(bytes) => Cow::Owned(
String::from_utf8(bytes)?
.trim_end_matches(char::from(0))
.to_string(),
),
Cow::Borrowed(bytes) => {
Cow::Borrowed(std::str::from_utf8(bytes)?.trim_end_matches(char::from(0)))
}
};
Ok(string)
}
None => {
let bytes = self.read_string_bytes(position)?;
String::from_utf8(bytes).map_err(BitError::from)
let string = match bytes {
Cow::Owned(bytes) => Cow::Owned(String::from_utf8(bytes)?),
Cow::Borrowed(bytes) => Cow::Borrowed(std::str::from_utf8(bytes)?),
};
Ok(string)
}
}
}
@ -571,11 +583,13 @@ where
}
#[inline]
fn read_string_bytes(&self, position: usize) -> Result<Vec<u8>> {
fn read_string_bytes(&self, position: usize) -> Result<Cow<'a, [u8]>> {
let shift = position & 7;
if shift == 0 {
let byte_index = position / 8;
Ok(self.slice[byte_index..self.find_null_byte(byte_index)].to_vec())
Ok(Cow::Borrowed(
&self.slice[byte_index..self.find_null_byte(byte_index)],
))
} else {
let mut acc = Vec::with_capacity(32);
let mut byte_index = position / 8;
@ -596,7 +610,7 @@ where
for i in 0..USIZE_SIZE - 1 {
if usable_bytes[i] == 0 {
acc.extend_from_slice(&usable_bytes[0..i]);
return Ok(acc);
return Ok(Cow::Owned(acc));
}
}
}
@ -638,7 +652,7 @@ where
T: Float + UncheckedPrimitiveFloat,
{
let type_bit_size = size_of::<T>() * 8;
let end = if position + type_bit_size + USIZE_BIT_SIZE > self.bit_len() {
if position + type_bit_size + USIZE_BIT_SIZE > self.bit_len() {
if position + type_bit_size > self.bit_len() {
if position > self.bit_len() {
return Err(BitError::IndexOutOfBounds {
@ -652,12 +666,10 @@ where
});
}
}
true
Ok(unsafe { self.read_float_unchecked(position, true) })
} else {
false
};
Ok(unsafe { self.read_float_unchecked(position, end) })
Ok(unsafe { self.read_float_unchecked(position, false) })
}
}
#[doc(hidden)]

View file

@ -7,6 +7,7 @@ use crate::endianness::Endianness;
use crate::num_traits::{IsSigned, UncheckedPrimitiveFloat, UncheckedPrimitiveInt};
use crate::BitReadBuffer;
use crate::{BitError, BitRead, BitReadSized, Result};
use std::borrow::Cow;
use std::cmp::min;
/// Stream that provides an easy way to iterate trough a [`BitBuffer`]
@ -222,13 +223,14 @@ where
/// # use bitbuffer::{BitReadBuffer, BitReadStream, LittleEndian, Result};
/// #
/// # fn main() -> Result<()> {
/// # let bytes = vec![
/// # use std::borrow::Borrow;
/// let bytes = vec![
/// # 0b1011_0101, 0b0110_1010, 0b1010_1100, 0b1001_1001,
/// # 0b1001_1001, 0b1001_1001, 0b1001_1001, 0b1110_0111
/// # ];
/// # let buffer = BitReadBuffer::new(&bytes, LittleEndian);
/// # let mut stream = BitReadStream::new(buffer);
/// assert_eq!(stream.read_bytes(3)?, &[0b1011_0101, 0b0110_1010, 0b1010_1100]);
/// assert_eq!(stream.read_bytes(3)?.to_vec(), &[0b1011_0101, 0b0110_1010, 0b1010_1100]);
/// assert_eq!(stream.pos(), 24);
/// #
/// # Ok(())
@ -237,7 +239,7 @@ where
///
/// [`ReadError::NotEnoughData`]: enum.ReadError.html#variant.NotEnoughData
#[inline]
pub fn read_bytes(&mut self, byte_count: usize) -> Result<Vec<u8>> {
pub fn read_bytes(&mut self, byte_count: usize) -> Result<Cow<'a, [u8]>> {
let count = byte_count * 8;
let result = self.buffer.read_bytes(self.pos, byte_count);
if result.is_ok() {
@ -248,7 +250,7 @@ where
#[doc(hidden)]
#[inline]
pub unsafe fn read_bytes_unchecked(&mut self, byte_count: usize) -> Vec<u8> {
pub unsafe fn read_bytes_unchecked(&mut self, byte_count: usize) -> Cow<'a, [u8]> {
let count = byte_count * 8;
let result = self.buffer.read_bytes_unchecked(self.pos, byte_count);
self.pos += count;
@ -298,7 +300,7 @@ where
/// [`ReadError::NotEnoughData`]: enum.ReadError.html#variant.NotEnoughData
/// [`ReadError::Utf8Error`]: enum.ReadError.html#variant.Utf8Error
#[inline]
pub fn read_string(&mut self, byte_len: Option<usize>) -> Result<String> {
pub fn read_string(&mut self, byte_len: Option<usize>) -> Result<Cow<'a, str>> {
let max_length = self.bits_left() / 8;
let result = self.buffer.read_string(self.pos, byte_len).map_err(|err| {
@ -306,7 +308,7 @@ where
if let BitError::Utf8Error(err) = &err {
self.pos += match byte_len {
Some(len) => len * 8,
None => min((err.as_bytes().len() + 1) * 8, max_length),
None => min((err.valid_up_to() + 1) * 8, max_length),
};
}
err
@ -329,7 +331,7 @@ where
acc.push(c);
}
self.pos += acc.len() * 8;
return Ok(acc);
return Ok(Cow::Owned(acc));
}
self.pos += read;
Ok(result)