mirror of
https://codeberg.org/icewind/logsmash.git
synced 2026-06-03 18:14:11 +02:00
tar(.gz) support
This commit is contained in:
parent
2a76bb44a9
commit
7a524ae1d4
6 changed files with 287 additions and 56 deletions
88
Cargo.lock
generated
88
Cargo.lock
generated
|
|
@ -402,6 +402,28 @@ version = "1.0.1"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5"
|
||||
|
||||
[[package]]
|
||||
name = "errno"
|
||||
version = "0.3.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "534c5cf6194dfab3db3242765c03bbe257cf92f22b38f6bc0c58d59108a820ba"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"windows-sys 0.52.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "filetime"
|
||||
version = "0.2.25"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "35c0522e981e68cbfa8c3f978441a5f34b30b96e146b33cd3359176b50fe8586"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"libc",
|
||||
"libredox",
|
||||
"windows-sys 0.59.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "flate2"
|
||||
version = "1.0.31"
|
||||
|
|
@ -540,9 +562,26 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "libc"
|
||||
version = "0.2.155"
|
||||
version = "0.2.159"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c"
|
||||
checksum = "561d97a539a36e26a9a5fad1ea11a3039a67714694aaa379433e580854bc3dc5"
|
||||
|
||||
[[package]]
|
||||
name = "libredox"
|
||||
version = "0.1.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c0ff37bd590ca25063e35af745c343cb7a0271906fb7b37e4813e8f79f00268d"
|
||||
dependencies = [
|
||||
"bitflags 2.6.0",
|
||||
"libc",
|
||||
"redox_syscall",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "linux-raw-sys"
|
||||
version = "0.4.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89"
|
||||
|
||||
[[package]]
|
||||
name = "lock_api"
|
||||
|
|
@ -585,6 +624,7 @@ dependencies = [
|
|||
"regex",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"tar",
|
||||
"thiserror",
|
||||
"tikv-jemallocator",
|
||||
"time",
|
||||
|
|
@ -903,6 +943,19 @@ version = "0.8.4"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b"
|
||||
|
||||
[[package]]
|
||||
name = "rustix"
|
||||
version = "0.38.37"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8acb788b847c24f28525660c4d7758620a7210875711f79e7f663cc152726811"
|
||||
dependencies = [
|
||||
"bitflags 2.6.0",
|
||||
"errno",
|
||||
"libc",
|
||||
"linux-raw-sys",
|
||||
"windows-sys 0.52.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rustversion"
|
||||
version = "1.0.17"
|
||||
|
|
@ -1077,6 +1130,17 @@ dependencies = [
|
|||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tar"
|
||||
version = "0.4.42"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4ff6c40d3aedb5e06b57c6f669ad17ab063dd1e63d977c6a88e7f4dfa4f04020"
|
||||
dependencies = [
|
||||
"filetime",
|
||||
"libc",
|
||||
"xattr",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "termcolor"
|
||||
version = "1.4.1"
|
||||
|
|
@ -1269,6 +1333,15 @@ dependencies = [
|
|||
"windows-targets 0.52.6",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows-sys"
|
||||
version = "0.59.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b"
|
||||
dependencies = [
|
||||
"windows-targets 0.52.6",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows-targets"
|
||||
version = "0.48.5"
|
||||
|
|
@ -1390,6 +1463,17 @@ version = "0.52.6"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
|
||||
|
||||
[[package]]
|
||||
name = "xattr"
|
||||
version = "1.3.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8da84f1a25939b27f6820d92aed108f83ff920fdf11a7b19366c27c4cda81d4f"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"linux-raw-sys",
|
||||
"rustix",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "zerocopy"
|
||||
version = "0.7.35"
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@
|
|||
name = "logsmash"
|
||||
version = "0.1.5"
|
||||
edition = "2021"
|
||||
rust-version = "1.74.0"
|
||||
rust-version = "1.75.0"
|
||||
license = "GPL-3"
|
||||
|
||||
[dependencies]
|
||||
|
|
@ -25,6 +25,7 @@ ahash = "0.8.11"
|
|||
base64 = "0.21.7"
|
||||
derive_more = { version = "1.0.0-beta.6", features = ["from"] }
|
||||
rayon = "1.10.0"
|
||||
tar = "0.4.42"
|
||||
|
||||
[target.'cfg(not(target_os = "windows"))'.dependencies]
|
||||
tikv-jemallocator = "0.6.0"
|
||||
|
|
|
|||
|
|
@ -1,3 +1,4 @@
|
|||
use std::string::FromUtf8Error;
|
||||
use thiserror::Error;
|
||||
use zip::result::ZipError;
|
||||
|
||||
|
|
@ -25,4 +26,6 @@ pub enum ReadError {
|
|||
MultipleFiles,
|
||||
#[error("archive contains no files")]
|
||||
NoFiles,
|
||||
#[error("log file contained non-utf8 characters: {0:#}")]
|
||||
Utf8(#[from] FromUtf8Error),
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,53 +0,0 @@
|
|||
use crate::error::ReadError;
|
||||
use flate2::read::GzDecoder;
|
||||
use std::fs::File;
|
||||
use std::io::Read;
|
||||
use zip::ZipArchive;
|
||||
|
||||
pub struct LogFile {
|
||||
content: String,
|
||||
}
|
||||
|
||||
impl LogFile {
|
||||
pub fn open(path: &str) -> Result<LogFile, ReadError> {
|
||||
let mut file = File::open(path)?;
|
||||
if path.ends_with(".zip") {
|
||||
let mut zip = ZipArchive::new(file)?;
|
||||
let files: Vec<_> = zip
|
||||
.file_names()
|
||||
.enumerate()
|
||||
.filter(|(_, name)| !name.starts_with("__MACOSX"))
|
||||
.collect();
|
||||
if files.len() > 1 {
|
||||
return Err(ReadError::MultipleFiles);
|
||||
} else if files.is_empty() {
|
||||
return Err(ReadError::NoFiles);
|
||||
}
|
||||
|
||||
let mut log = zip.by_index(files[0].0)?;
|
||||
let mut content = String::with_capacity(log.size() as usize);
|
||||
log.read_to_string(&mut content)?;
|
||||
|
||||
Ok(LogFile { content })
|
||||
} else if path.ends_with(".gz") {
|
||||
let mut decoder = GzDecoder::new(file);
|
||||
let mut content = String::new();
|
||||
decoder.read_to_string(&mut content)?;
|
||||
|
||||
Ok(LogFile { content })
|
||||
} else {
|
||||
let mut content = String::new();
|
||||
file.read_to_string(&mut content)?;
|
||||
|
||||
Ok(LogFile { content })
|
||||
}
|
||||
}
|
||||
|
||||
pub fn iter(&self) -> impl Iterator<Item = &str> + Send + '_ {
|
||||
self.content.lines()
|
||||
}
|
||||
|
||||
pub fn nth(&self, index: usize) -> Option<&str> {
|
||||
self.iter().nth(index)
|
||||
}
|
||||
}
|
||||
125
src/logfile/archive.rs
Normal file
125
src/logfile/archive.rs
Normal file
|
|
@ -0,0 +1,125 @@
|
|||
use crate::error::ReadError;
|
||||
use itertools::Either;
|
||||
use std::borrow::Cow;
|
||||
use std::io::{Read, Seek};
|
||||
use std::iter::empty;
|
||||
use std::sync::Mutex;
|
||||
|
||||
pub trait Archive {
|
||||
type Entry<'a>: ArchiveEntry
|
||||
where
|
||||
Self: 'a;
|
||||
|
||||
fn entries(&mut self) -> impl Iterator<Item = Self::Entry<'_>>;
|
||||
}
|
||||
|
||||
pub trait ArchiveEntry {
|
||||
fn name(&self) -> Cow<str>;
|
||||
|
||||
fn extract(self) -> Result<Vec<u8>, ReadError>;
|
||||
}
|
||||
|
||||
fn read_to_vec<R: Read>(size: usize, mut reader: R) -> Result<Vec<u8>, ReadError> {
|
||||
let mut buff = Vec::with_capacity(size.max(GB));
|
||||
reader.read_to_end(&mut buff)?;
|
||||
Ok(buff)
|
||||
}
|
||||
|
||||
const GB: usize = 1_073_741_824;
|
||||
|
||||
pub struct ZipArchive<R>(Mutex<zip::ZipArchive<R>>);
|
||||
|
||||
impl<R: Read + Seek> ZipArchive<R> {
|
||||
pub fn new(reader: R) -> Result<Self, ReadError> {
|
||||
Ok(Self(Mutex::new(zip::ZipArchive::new(reader)?)))
|
||||
}
|
||||
}
|
||||
|
||||
pub struct ZipEntry<'a, R> {
|
||||
id: usize,
|
||||
pub path: String,
|
||||
archive: &'a ZipArchive<R>,
|
||||
}
|
||||
|
||||
impl<R: Read + Seek> ZipArchive<R> {
|
||||
fn extract(&self, id: usize) -> Result<Vec<u8>, ReadError> {
|
||||
let mut archive = self.0.lock().unwrap();
|
||||
let file = archive.by_index(id)?;
|
||||
read_to_vec(file.size() as usize, file)
|
||||
}
|
||||
}
|
||||
|
||||
impl<R: Read + Seek> ArchiveEntry for ZipEntry<'_, R> {
|
||||
fn name(&self) -> Cow<str> {
|
||||
self.path.as_str().into()
|
||||
}
|
||||
|
||||
fn extract(self) -> Result<Vec<u8>, ReadError> {
|
||||
self.archive.extract(self.id)
|
||||
}
|
||||
}
|
||||
|
||||
impl<R: Read + Seek> Archive for ZipArchive<R> {
|
||||
type Entry<'a> = ZipEntry<'a, R> where R: 'a;
|
||||
|
||||
fn entries(&mut self) -> impl Iterator<Item = Self::Entry<'_>> {
|
||||
let names = self
|
||||
.0
|
||||
.lock()
|
||||
.unwrap()
|
||||
.file_names()
|
||||
.map(String::from)
|
||||
.collect::<Vec<_>>();
|
||||
names.into_iter().enumerate().map(|(id, path)| Self::Entry {
|
||||
id,
|
||||
path,
|
||||
archive: self,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
pub struct TarArchive<R: Read>(tar::Archive<R>);
|
||||
|
||||
impl<R: Read> TarArchive<R> {
|
||||
pub fn new(reader: R) -> Result<Self, ReadError> {
|
||||
Ok(Self(tar::Archive::new(reader)))
|
||||
}
|
||||
}
|
||||
|
||||
pub struct TarEntry {
|
||||
name: String,
|
||||
content: Vec<u8>,
|
||||
}
|
||||
|
||||
impl TarEntry {
|
||||
pub fn new<R: Read>(entry: tar::Entry<R>) -> Result<Self, ReadError> {
|
||||
// work around tar "in-order" requirement by just caching everything :(
|
||||
let name = match entry.path() {
|
||||
Ok(path) => path.display().to_string(),
|
||||
_ => "invalid path".into(),
|
||||
};
|
||||
let content = read_to_vec(entry.size() as usize, entry)?;
|
||||
Ok(TarEntry { name, content })
|
||||
}
|
||||
}
|
||||
|
||||
impl ArchiveEntry for TarEntry {
|
||||
fn name(&self) -> Cow<str> {
|
||||
self.name.as_str().into()
|
||||
}
|
||||
|
||||
fn extract(self) -> Result<Vec<u8>, ReadError> {
|
||||
Ok(self.content)
|
||||
}
|
||||
}
|
||||
|
||||
impl<R: Read> Archive for TarArchive<R> {
|
||||
type Entry<'a> = TarEntry where R: 'a;
|
||||
|
||||
fn entries(&mut self) -> impl Iterator<Item = Self::Entry<'_>> {
|
||||
match self.0.entries() {
|
||||
Ok(iter) => Either::Left(iter.flatten().flat_map(TarEntry::new)),
|
||||
_ => Either::Right(empty()),
|
||||
}
|
||||
}
|
||||
}
|
||||
71
src/logfile/mod.rs
Normal file
71
src/logfile/mod.rs
Normal file
|
|
@ -0,0 +1,71 @@
|
|||
mod archive;
|
||||
|
||||
use crate::error::ReadError;
|
||||
use crate::logfile::archive::{Archive, ArchiveEntry, TarArchive, ZipArchive};
|
||||
use flate2::read::GzDecoder;
|
||||
use std::fs::File;
|
||||
use std::io::Read;
|
||||
|
||||
pub struct LogFile {
|
||||
content: String,
|
||||
}
|
||||
|
||||
impl LogFile {
|
||||
pub fn open(path: &str) -> Result<LogFile, ReadError> {
|
||||
let file = File::open(path)?;
|
||||
if path.ends_with(".zip") {
|
||||
let mut zip = ZipArchive::new(file)?;
|
||||
let content = select_file(&mut zip)?;
|
||||
|
||||
return Ok(LogFile { content });
|
||||
}
|
||||
|
||||
if let Some(path) = path.strip_suffix(".gz") {
|
||||
let decoder = GzDecoder::new(file);
|
||||
return Self::open_no_seek(path, decoder);
|
||||
}
|
||||
|
||||
Self::open_no_seek(path, Box::new(file))
|
||||
}
|
||||
|
||||
fn open_no_seek<R: Read>(path: &str, mut file: R) -> Result<LogFile, ReadError> {
|
||||
if path.ends_with(".tar") {
|
||||
let mut zip = TarArchive::new(file)?;
|
||||
let content = select_file(&mut zip)?;
|
||||
|
||||
Ok(LogFile { content })
|
||||
} else {
|
||||
let mut content = String::new();
|
||||
file.read_to_string(&mut content)?;
|
||||
|
||||
Ok(LogFile { content })
|
||||
}
|
||||
}
|
||||
|
||||
pub fn iter(&self) -> impl Iterator<Item = &str> + Send + '_ {
|
||||
self.content.lines()
|
||||
}
|
||||
|
||||
pub fn nth(&self, index: usize) -> Option<&str> {
|
||||
self.iter().nth(index)
|
||||
}
|
||||
}
|
||||
|
||||
fn select_file<A: Archive>(archive: &mut A) -> Result<String, ReadError> {
|
||||
let entry = {
|
||||
let mut entries = archive
|
||||
.entries()
|
||||
.filter(|entry| !entry.name().starts_with("__MACOSX"))
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
// todo: present a picker instead
|
||||
if entries.len() > 1 {
|
||||
return Err(ReadError::MultipleFiles);
|
||||
} else if entries.is_empty() {
|
||||
return Err(ReadError::NoFiles);
|
||||
}
|
||||
entries.pop().unwrap()
|
||||
};
|
||||
let raw = entry.extract()?;
|
||||
Ok(String::from_utf8(raw)?)
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue