1
0
Fork 0
mirror of https://codeberg.org/icewind/bitbuffer.git synced 2026-06-03 16:44:06 +02:00

add string reading

This commit is contained in:
Robin Appelman 2019-02-23 17:52:28 +01:00
commit 79bc6d5914
4 changed files with 161 additions and 56 deletions

View file

@ -321,8 +321,11 @@ impl<'a, E, S> BitBuffer<'a, E, S>
/// 0b1001_1001, 0b1001_1001, 0b1001_1001, 0b1110_0111
/// ];
/// let buffer = BitBuffer::new(bytes, LittleEndian);
/// let bytes = buffer.read_bytes(5, 3).unwrap();
/// assert_eq!(bytes, &[0b0_1010_101, 0b0_1100_011, 0b1_1001_101]);
/// assert_eq!(buffer.read_bytes(5, 3).unwrap(), &[0b0_1010_101, 0b0_1100_011, 0b1_1001_101]);
/// assert_eq!(buffer.read_bytes(0, 8).unwrap(), &[
/// 0b1011_0101, 0b0110_1010, 0b1010_1100, 0b1001_1001,
/// 0b1001_1001, 0b1001_1001, 0b1001_1001, 0b1110_0111
/// ]);
/// ```
pub fn read_bytes(&self, position: usize, byte_count: usize) -> Result<Vec<u8>> {
let mut data = vec![];
@ -336,11 +339,60 @@ impl<'a, E, S> BitBuffer<'a, E, S>
let usable_bytes = &bytes[0..read];
data.extend_from_slice(usable_bytes);
byte_left -= read;
read_pos += read;
read_pos += read * 8;
}
Ok(data)
}
/// Read a series of bytes from the buffer as string
///
/// You can either read a fixed number of bytes, or a dynamic length null-terminated string
///
/// # Errors
///
/// - [`ReadError::NotEnoughData`](enum.ReadError.html#variant.NotEnoughData): not enough bits available in the buffer
/// - [`ReadError::Utf8Error`](enum.ReadError.html#variant.Utf8Error): the read bytes are not valid utf8
///
/// # Examples
///
/// ```
/// use bitstream_reader::{BitBuffer, BitStream, LittleEndian};
///
/// let bytes: &[u8] = &[
/// 0x48, 0x65, 0x6c, 0x6c,
/// 0x6f, 0x20, 0x77, 0x6f,
/// 0x72, 0x6c, 0x64, 0,
/// 0, 0, 0, 0
/// ];
/// let buffer = BitBuffer::new(bytes, LittleEndian);
/// // Fixed length string
/// assert_eq!(buffer.read_string(0, Some(13)).unwrap(), "Hello world".to_owned());
/// // fixed length with null padding
/// assert_eq!(buffer.read_string(0, Some(16)).unwrap(), "Hello world".to_owned());
/// // null terminated
/// assert_eq!(buffer.read_string(0, None).unwrap(), "Hello world".to_owned());
/// ```
pub fn read_string(&self, position: usize, byte_len: Option<usize>) -> Result<String> {
let bytes = match byte_len {
Some(len) => self.read_bytes(position, len)?,
None => {
let mut acc = vec![];
let mut pos = position;
loop {
let byte = self.read(pos, 8)?;
acc.push(byte);
if byte == 0 {
break;
}
pos += 8;
}
acc
}
};
let raw_string = String::from_utf8(bytes)?;
Ok(raw_string.trim_end_matches(char::from(0)).to_owned())
}
/// Read a sequence of bits from the buffer as float
///
/// # Errors

View file

@ -1,24 +1,25 @@
#![warn(missing_docs)]
#![feature(test)]
//#![feature(test)]
//! Tools for reading integers of arbitrary bit length and non byte-aligned integers and other data types
// for bench on nightly
extern crate test;
//extern crate test;
pub use buffer::{BitBuffer, IsPadded};
pub use stream::BitStream;
pub use endianness::*;
pub use std::string::FromUtf8Error;
pub use stream::BitStream;
mod buffer;
mod stream;
mod endianness;
mod is_signed;
mod stream;
#[cfg(test)]
mod tests;
/// Errors that can be returned when trying to read from a buffer
#[derive(Debug, PartialEq, Copy, Clone)]
#[derive(Debug)]
pub enum ReadError {
/// Too many bits requested to fit in the requested data type
TooManyBits {
@ -41,6 +42,14 @@ pub enum ReadError {
/// the number of bits in the buffer
size: usize,
},
/// The read slice of bytes are not valid utf8
Utf8Error(FromUtf8Error),
}
impl From<FromUtf8Error> for ReadError {
fn from(err: FromUtf8Error) -> ReadError {
ReadError::Utf8Error(err)
}
}
/// Either the read bits in the requested format or a [`ReadError`](enum.ReadError.html)

View file

@ -1,11 +1,11 @@
use crate::endianness::Endianness;
use crate::is_signed::IsSigned;
use crate::{ReadError, Result};
use crate::BitBuffer;
use crate::buffer::IsPadded;
use crate::endianness::Endianness;
use crate::is_signed::IsSigned;
use num_traits::{Float, PrimInt};
use std::mem::size_of;
use std::ops::BitOrAssign;
use crate::buffer::IsPadded;
/// Stream that provides an easy way to iterate trough a BitBuffer
///
@ -192,8 +192,7 @@ impl<'a, E, S> BitStream<'a, E, S>
/// ];
/// let buffer = BitBuffer::new(bytes, LittleEndian);
/// let mut stream = BitStream::new(&buffer, None, None);
/// let bytes = stream.read_bytes(3).unwrap();
/// assert_eq!(bytes, &[0b1011_0101, 0b0110_1010, 0b1010_1100]);
/// assert_eq!(stream.read_bytes(3).unwrap(), &[0b1011_0101, 0b0110_1010, 0b1010_1100]);
/// assert_eq!(stream.pos(), 24);
/// ```
pub fn read_bytes(&mut self, byte_count: usize) -> Result<Vec<u8>> {
@ -207,6 +206,51 @@ impl<'a, E, S> BitStream<'a, E, S>
result
}
/// Read a series of bytes from the stream as utf8 string
///
/// You can either read a fixed number of bytes, or a dynamic length null-terminated string
///
/// # Errors
///
/// - [`ReadError::NotEnoughData`](enum.ReadError.html#variant.NotEnoughData): not enough bits available in the buffer
/// - [`ReadError::Utf8Error`](enum.ReadError.html#variant.Utf8Error): the read bytes are not valid utf8
///
/// # Examples
///
/// ```
/// use bitstream_reader::{BitBuffer, BitStream, LittleEndian};
///
/// let bytes: &[u8] = &[
/// 0x48, 0x65, 0x6c, 0x6c,
/// 0x6f, 0x20, 0x77, 0x6f,
/// 0x72, 0x6c, 0x64, 0,
/// 0, 0, 0, 0
/// ];
/// let buffer = BitBuffer::new(bytes, LittleEndian);
/// let mut stream = BitStream::new(&buffer, None, None);
/// // Fixed length string
/// stream.set_pos(0);
/// assert_eq!(stream.read_string(Some(13)).unwrap(), "Hello world".to_owned());
/// assert_eq!(13, stream.pos());
/// // fixed length with null padding
/// stream.set_pos(0);
/// assert_eq!(stream.read_string(Some(16)).unwrap(), "Hello world".to_owned());
/// assert_eq!(16, stream.pos());
/// // null terminated
/// stream.set_pos(0);
/// assert_eq!(stream.read_string(None).unwrap(), "Hello world".to_owned());
/// assert_eq!(12, stream.pos()); // 1 more for the terminating null byte
/// ```
pub fn read_string(&mut self, byte_len: Option<usize>) -> Result<String> {
let result = self.buffer.read_string(self.pos, byte_len)?;
let read = match byte_len {
Some(len) => len,
None => result.len() + 1
};
self.pos += read;
Ok(result)
}
/// Read a sequence of bits from the stream as a BitStream
///
/// # Errors

View file

@ -1,7 +1,7 @@
use super::*;
// for bench on nightly
use std::fs;
use test::Bencher;
//use std::fs;
//use test::Bencher;
const BYTES: &'static [u8] = &[
0b1011_0101,
@ -224,43 +224,43 @@ fn read_f64_le() {
}
// for bench on nightly
fn read_perf<P: IsPadded>(buffer: BitBuffer<LittleEndian, P>) -> u16 {
let size = 5;
let mut pos = 0;
let len = buffer.bit_len();
let mut result: u16 = 0;
loop {
if pos + size > len {
return result;
}
let data = buffer.read::<u16>(pos, size).unwrap();
result = result.wrapping_add(data);
pos += size;
}
}
#[bench]
fn perf_padded(b: &mut Bencher) {
let mut file = fs::read("/bulk/tmp/test.dem").expect("Unable to read file");
let len = file.len();
file.extend_from_slice(&[0, 0, 0, 0, 0, 0, 0, 0]);
let bytes = file.as_slice();
b.iter(|| {
let buffer = BitBuffer::from_padded_slice(&bytes, len, LittleEndian);
let data = read_perf(buffer);
assert_eq!(data, 43943);
test::black_box(data);
});
}
#[bench]
fn perf_non_padded(b: &mut Bencher) {
let file = fs::read("/bulk/tmp/test.dem").expect("Unable to read file");
let bytes = file.as_slice();
b.iter(|| {
let buffer = BitBuffer::new(&bytes, LittleEndian);
let data = read_perf(buffer);
assert_eq!(data, 43943);
test::black_box(data);
});
}
//fn read_perf<P: IsPadded>(buffer: BitBuffer<LittleEndian, P>) -> u16 {
// let size = 5;
// let mut pos = 0;
// let len = buffer.bit_len();
// let mut result: u16 = 0;
// loop {
// if pos + size > len {
// return result;
// }
// let data = buffer.read::<u16>(pos, size).unwrap();
// result = result.wrapping_add(data);
// pos += size;
// }
//}
//
//#[bench]
//fn perf_padded(b: &mut Bencher) {
// let mut file = fs::read("/bulk/tmp/test.dem").expect("Unable to read file");
// let len = file.len();
// file.extend_from_slice(&[0, 0, 0, 0, 0, 0, 0, 0]);
// let bytes = file.as_slice();
// b.iter(|| {
// let buffer = BitBuffer::from_padded_slice(&bytes, len, LittleEndian);
// let data = read_perf(buffer);
// assert_eq!(data, 43943);
// test::black_box(data);
// });
//}
//
//#[bench]
//fn perf_non_padded(b: &mut Bencher) {
// let file = fs::read("/bulk/tmp/test.dem").expect("Unable to read file");
// let bytes = file.as_slice();
// b.iter(|| {
// let buffer = BitBuffer::new(&bytes, LittleEndian);
// let data = read_perf(buffer);
// assert_eq!(data, 43943);
// test::black_box(data);
// });
//}