support .zstd, .xz and .bz2 compressed inputs
Some checks are pending
CI / matrix (push) Waiting to run
CI / ${{ matrix.check }} (push) Blocked by required conditions
CI / build (push) Blocked by required conditions
CI / build-nixpkgs (push) Blocked by required conditions

This commit is contained in:
Robin Appelman 2024-11-05 17:21:14 +01:00
commit 4f3d6a17ab
4 changed files with 80 additions and 3 deletions

58
Cargo.lock generated
View file

@ -117,6 +117,16 @@ dependencies = [
"libc", "libc",
] ]
[[package]]
name = "bzip2-rs"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "beeb59e7e4c811ab37cc73680c798c7a5da77fc9989c62b09138e31ee740f735"
dependencies = [
"crc32fast",
"tinyvec",
]
[[package]] [[package]]
name = "bzip2-sys" name = "bzip2-sys"
version = "0.1.11+1.0.8" version = "0.1.11+1.0.8"
@ -648,6 +658,7 @@ version = "0.1.5"
dependencies = [ dependencies = [
"ahash", "ahash",
"base64 0.22.1", "base64 0.22.1",
"bzip2-rs",
"clap", "clap",
"derive_more", "derive_more",
"flate2", "flate2",
@ -659,6 +670,7 @@ dependencies = [
"ratatui", "ratatui",
"rayon", "rayon",
"regex", "regex",
"ruzstd",
"serde", "serde",
"serde_json", "serde_json",
"tar", "tar",
@ -666,6 +678,7 @@ dependencies = [
"tikv-jemallocator", "tikv-jemallocator",
"time", "time",
"tinystr", "tinystr",
"xz2",
"zip", "zip",
] ]
@ -696,6 +709,17 @@ dependencies = [
"crc", "crc",
] ]
[[package]]
name = "lzma-sys"
version = "0.1.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5fda04ab3764e6cde78b9974eec4f779acaba7c4e84b36eca3cf77c581b85d27"
dependencies = [
"cc",
"libc",
"pkg-config",
]
[[package]] [[package]]
name = "main_error" name = "main_error"
version = "0.1.2" version = "0.1.2"
@ -1003,6 +1027,15 @@ version = "1.0.18"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0e819f2bc632f285be6d7cd36e25940d45b2391dd6d9b939e79de557f7014248" checksum = "0e819f2bc632f285be6d7cd36e25940d45b2391dd6d9b939e79de557f7014248"
[[package]]
name = "ruzstd"
version = "0.7.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "99c3938e133aac070997ddc684d4b393777d293ba170f2988c8fd5ea2ad4ce21"
dependencies = [
"twox-hash",
]
[[package]] [[package]]
name = "ryu" name = "ryu"
version = "1.0.18" version = "1.0.18"
@ -1269,6 +1302,22 @@ dependencies = [
"serde", "serde",
] ]
[[package]]
name = "tinyvec"
version = "1.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "445e881f4f6d382d5f27c034e25eb92edd7c784ceab92a0937db7f2e9471b938"
[[package]]
name = "twox-hash"
version = "1.6.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "97fee6b57c6a41524a810daee9286c02d7752c4253064d0b05472833a438f675"
dependencies = [
"cfg-if",
"static_assertions",
]
[[package]] [[package]]
name = "typenum" name = "typenum"
version = "1.17.0" version = "1.17.0"
@ -1452,6 +1501,15 @@ dependencies = [
"rustix", "rustix",
] ]
[[package]]
name = "xz2"
version = "0.1.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "388c44dc09d76f1536602ead6d325eb532f5c122f17782bd57fb47baeeb767e2"
dependencies = [
"lzma-sys",
]
[[package]] [[package]]
name = "zerocopy" name = "zerocopy"
version = "0.7.35" version = "0.7.35"

View file

@ -14,8 +14,6 @@ regex = "1.10.5"
log = "0.4.22" log = "0.4.22"
clap = { version = "=4.1.3", features = ["derive"] } clap = { version = "=4.1.3", features = ["derive"] }
logsmash-data = { version = "0.1.0", path = "./data" } logsmash-data = { version = "0.1.0", path = "./data" }
zip = "2.1.5"
flate2 = "1.0.31"
itertools = "0.13.0" itertools = "0.13.0"
ratatui = "0.29.0" ratatui = "0.29.0"
tinystr = { version = "0.7.6", features = ["serde"] } tinystr = { version = "0.7.6", features = ["serde"] }
@ -26,6 +24,11 @@ base64 = "0.22.1"
derive_more = { version = "1.0.0-beta.6", features = ["from"] } derive_more = { version = "1.0.0-beta.6", features = ["from"] }
rayon = "1.10.0" rayon = "1.10.0"
tar = "0.4.42" tar = "0.4.42"
zip = "2.1.5"
flate2 = "1.0.31"
xz2 = "0.1.7"
bzip2-rs = "0.1.2"
ruzstd = "0.7.2"
[target.'cfg(not(target_os = "windows"))'.dependencies] [target.'cfg(not(target_os = "windows"))'.dependencies]
tikv-jemallocator = "0.6.0" tikv-jemallocator = "0.6.0"

View file

@ -1,3 +1,4 @@
use ruzstd::frame_decoder::FrameDecoderError;
use std::string::FromUtf8Error; use std::string::FromUtf8Error;
use thiserror::Error; use thiserror::Error;
use zip::result::ZipError; use zip::result::ZipError;
@ -22,6 +23,8 @@ pub enum ReadError {
Io(#[from] std::io::Error), Io(#[from] std::io::Error),
#[error(transparent)] #[error(transparent)]
Zip(#[from] ZipError), Zip(#[from] ZipError),
#[error(transparent)]
Zstd(#[from] FrameDecoderError),
#[error("archive contains multiple files")] #[error("archive contains multiple files")]
MultipleFiles, MultipleFiles,
#[error("archive contains no files")] #[error("archive contains no files")]

View file

@ -2,9 +2,12 @@ mod archive;
use crate::error::ReadError; use crate::error::ReadError;
use crate::logfile::archive::{Archive, ArchiveEntry, TarArchive, ZipArchive}; use crate::logfile::archive::{Archive, ArchiveEntry, TarArchive, ZipArchive};
use bzip2_rs::DecoderReader;
use flate2::read::GzDecoder; use flate2::read::GzDecoder;
use ruzstd::StreamingDecoder;
use std::fs::File; use std::fs::File;
use std::io::Read; use std::io::{BufReader, Read};
use xz2::read::XzDecoder;
pub struct LogFile { pub struct LogFile {
content: String, content: String,
@ -13,6 +16,7 @@ pub struct LogFile {
impl LogFile { impl LogFile {
pub fn open(path: &str) -> Result<LogFile, ReadError> { pub fn open(path: &str) -> Result<LogFile, ReadError> {
let file = File::open(path)?; let file = File::open(path)?;
let file = BufReader::new(file);
if path.ends_with(".zip") { if path.ends_with(".zip") {
let mut zip = ZipArchive::new(file)?; let mut zip = ZipArchive::new(file)?;
let content = select_file(&mut zip)?; let content = select_file(&mut zip)?;
@ -23,6 +27,15 @@ impl LogFile {
if let Some(path) = path.strip_suffix(".gz") { if let Some(path) = path.strip_suffix(".gz") {
let decoder = GzDecoder::new(file); let decoder = GzDecoder::new(file);
return Self::open_no_seek(path, decoder); return Self::open_no_seek(path, decoder);
} else if let Some(path) = path.strip_suffix(".xz") {
let decoder = XzDecoder::new(file);
return Self::open_no_seek(path, decoder);
} else if let Some(path) = path.strip_suffix(".bz2") {
let decoder = DecoderReader::new(file);
return Self::open_no_seek(path, decoder);
} else if let Some(path) = path.strip_suffix(".zst") {
let decoder = StreamingDecoder::new(file)?;
return Self::open_no_seek(path, decoder);
} }
Self::open_no_seek(path, Box::new(file)) Self::open_no_seek(path, Box::new(file))