mirror of
https://codeberg.org/icewind/logsmash.git
synced 2026-06-03 18:14:11 +02:00
tar(.gz) support
This commit is contained in:
parent
2a76bb44a9
commit
7a524ae1d4
6 changed files with 287 additions and 56 deletions
|
|
@ -1,3 +1,4 @@
|
|||
use std::string::FromUtf8Error;
|
||||
use thiserror::Error;
|
||||
use zip::result::ZipError;
|
||||
|
||||
|
|
@ -25,4 +26,6 @@ pub enum ReadError {
|
|||
MultipleFiles,
|
||||
#[error("archive contains no files")]
|
||||
NoFiles,
|
||||
#[error("log file contained non-utf8 characters: {0:#}")]
|
||||
Utf8(#[from] FromUtf8Error),
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,53 +0,0 @@
|
|||
use crate::error::ReadError;
|
||||
use flate2::read::GzDecoder;
|
||||
use std::fs::File;
|
||||
use std::io::Read;
|
||||
use zip::ZipArchive;
|
||||
|
||||
pub struct LogFile {
|
||||
content: String,
|
||||
}
|
||||
|
||||
impl LogFile {
|
||||
pub fn open(path: &str) -> Result<LogFile, ReadError> {
|
||||
let mut file = File::open(path)?;
|
||||
if path.ends_with(".zip") {
|
||||
let mut zip = ZipArchive::new(file)?;
|
||||
let files: Vec<_> = zip
|
||||
.file_names()
|
||||
.enumerate()
|
||||
.filter(|(_, name)| !name.starts_with("__MACOSX"))
|
||||
.collect();
|
||||
if files.len() > 1 {
|
||||
return Err(ReadError::MultipleFiles);
|
||||
} else if files.is_empty() {
|
||||
return Err(ReadError::NoFiles);
|
||||
}
|
||||
|
||||
let mut log = zip.by_index(files[0].0)?;
|
||||
let mut content = String::with_capacity(log.size() as usize);
|
||||
log.read_to_string(&mut content)?;
|
||||
|
||||
Ok(LogFile { content })
|
||||
} else if path.ends_with(".gz") {
|
||||
let mut decoder = GzDecoder::new(file);
|
||||
let mut content = String::new();
|
||||
decoder.read_to_string(&mut content)?;
|
||||
|
||||
Ok(LogFile { content })
|
||||
} else {
|
||||
let mut content = String::new();
|
||||
file.read_to_string(&mut content)?;
|
||||
|
||||
Ok(LogFile { content })
|
||||
}
|
||||
}
|
||||
|
||||
pub fn iter(&self) -> impl Iterator<Item = &str> + Send + '_ {
|
||||
self.content.lines()
|
||||
}
|
||||
|
||||
pub fn nth(&self, index: usize) -> Option<&str> {
|
||||
self.iter().nth(index)
|
||||
}
|
||||
}
|
||||
125
src/logfile/archive.rs
Normal file
125
src/logfile/archive.rs
Normal file
|
|
@ -0,0 +1,125 @@
|
|||
use crate::error::ReadError;
|
||||
use itertools::Either;
|
||||
use std::borrow::Cow;
|
||||
use std::io::{Read, Seek};
|
||||
use std::iter::empty;
|
||||
use std::sync::Mutex;
|
||||
|
||||
pub trait Archive {
|
||||
type Entry<'a>: ArchiveEntry
|
||||
where
|
||||
Self: 'a;
|
||||
|
||||
fn entries(&mut self) -> impl Iterator<Item = Self::Entry<'_>>;
|
||||
}
|
||||
|
||||
pub trait ArchiveEntry {
|
||||
fn name(&self) -> Cow<str>;
|
||||
|
||||
fn extract(self) -> Result<Vec<u8>, ReadError>;
|
||||
}
|
||||
|
||||
fn read_to_vec<R: Read>(size: usize, mut reader: R) -> Result<Vec<u8>, ReadError> {
|
||||
let mut buff = Vec::with_capacity(size.max(GB));
|
||||
reader.read_to_end(&mut buff)?;
|
||||
Ok(buff)
|
||||
}
|
||||
|
||||
const GB: usize = 1_073_741_824;
|
||||
|
||||
pub struct ZipArchive<R>(Mutex<zip::ZipArchive<R>>);
|
||||
|
||||
impl<R: Read + Seek> ZipArchive<R> {
|
||||
pub fn new(reader: R) -> Result<Self, ReadError> {
|
||||
Ok(Self(Mutex::new(zip::ZipArchive::new(reader)?)))
|
||||
}
|
||||
}
|
||||
|
||||
pub struct ZipEntry<'a, R> {
|
||||
id: usize,
|
||||
pub path: String,
|
||||
archive: &'a ZipArchive<R>,
|
||||
}
|
||||
|
||||
impl<R: Read + Seek> ZipArchive<R> {
|
||||
fn extract(&self, id: usize) -> Result<Vec<u8>, ReadError> {
|
||||
let mut archive = self.0.lock().unwrap();
|
||||
let file = archive.by_index(id)?;
|
||||
read_to_vec(file.size() as usize, file)
|
||||
}
|
||||
}
|
||||
|
||||
impl<R: Read + Seek> ArchiveEntry for ZipEntry<'_, R> {
|
||||
fn name(&self) -> Cow<str> {
|
||||
self.path.as_str().into()
|
||||
}
|
||||
|
||||
fn extract(self) -> Result<Vec<u8>, ReadError> {
|
||||
self.archive.extract(self.id)
|
||||
}
|
||||
}
|
||||
|
||||
impl<R: Read + Seek> Archive for ZipArchive<R> {
|
||||
type Entry<'a> = ZipEntry<'a, R> where R: 'a;
|
||||
|
||||
fn entries(&mut self) -> impl Iterator<Item = Self::Entry<'_>> {
|
||||
let names = self
|
||||
.0
|
||||
.lock()
|
||||
.unwrap()
|
||||
.file_names()
|
||||
.map(String::from)
|
||||
.collect::<Vec<_>>();
|
||||
names.into_iter().enumerate().map(|(id, path)| Self::Entry {
|
||||
id,
|
||||
path,
|
||||
archive: self,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
pub struct TarArchive<R: Read>(tar::Archive<R>);
|
||||
|
||||
impl<R: Read> TarArchive<R> {
|
||||
pub fn new(reader: R) -> Result<Self, ReadError> {
|
||||
Ok(Self(tar::Archive::new(reader)))
|
||||
}
|
||||
}
|
||||
|
||||
pub struct TarEntry {
|
||||
name: String,
|
||||
content: Vec<u8>,
|
||||
}
|
||||
|
||||
impl TarEntry {
|
||||
pub fn new<R: Read>(entry: tar::Entry<R>) -> Result<Self, ReadError> {
|
||||
// work around tar "in-order" requirement by just caching everything :(
|
||||
let name = match entry.path() {
|
||||
Ok(path) => path.display().to_string(),
|
||||
_ => "invalid path".into(),
|
||||
};
|
||||
let content = read_to_vec(entry.size() as usize, entry)?;
|
||||
Ok(TarEntry { name, content })
|
||||
}
|
||||
}
|
||||
|
||||
impl ArchiveEntry for TarEntry {
|
||||
fn name(&self) -> Cow<str> {
|
||||
self.name.as_str().into()
|
||||
}
|
||||
|
||||
fn extract(self) -> Result<Vec<u8>, ReadError> {
|
||||
Ok(self.content)
|
||||
}
|
||||
}
|
||||
|
||||
impl<R: Read> Archive for TarArchive<R> {
|
||||
type Entry<'a> = TarEntry where R: 'a;
|
||||
|
||||
fn entries(&mut self) -> impl Iterator<Item = Self::Entry<'_>> {
|
||||
match self.0.entries() {
|
||||
Ok(iter) => Either::Left(iter.flatten().flat_map(TarEntry::new)),
|
||||
_ => Either::Right(empty()),
|
||||
}
|
||||
}
|
||||
}
|
||||
71
src/logfile/mod.rs
Normal file
71
src/logfile/mod.rs
Normal file
|
|
@ -0,0 +1,71 @@
|
|||
mod archive;
|
||||
|
||||
use crate::error::ReadError;
|
||||
use crate::logfile::archive::{Archive, ArchiveEntry, TarArchive, ZipArchive};
|
||||
use flate2::read::GzDecoder;
|
||||
use std::fs::File;
|
||||
use std::io::Read;
|
||||
|
||||
pub struct LogFile {
|
||||
content: String,
|
||||
}
|
||||
|
||||
impl LogFile {
|
||||
pub fn open(path: &str) -> Result<LogFile, ReadError> {
|
||||
let file = File::open(path)?;
|
||||
if path.ends_with(".zip") {
|
||||
let mut zip = ZipArchive::new(file)?;
|
||||
let content = select_file(&mut zip)?;
|
||||
|
||||
return Ok(LogFile { content });
|
||||
}
|
||||
|
||||
if let Some(path) = path.strip_suffix(".gz") {
|
||||
let decoder = GzDecoder::new(file);
|
||||
return Self::open_no_seek(path, decoder);
|
||||
}
|
||||
|
||||
Self::open_no_seek(path, Box::new(file))
|
||||
}
|
||||
|
||||
fn open_no_seek<R: Read>(path: &str, mut file: R) -> Result<LogFile, ReadError> {
|
||||
if path.ends_with(".tar") {
|
||||
let mut zip = TarArchive::new(file)?;
|
||||
let content = select_file(&mut zip)?;
|
||||
|
||||
Ok(LogFile { content })
|
||||
} else {
|
||||
let mut content = String::new();
|
||||
file.read_to_string(&mut content)?;
|
||||
|
||||
Ok(LogFile { content })
|
||||
}
|
||||
}
|
||||
|
||||
pub fn iter(&self) -> impl Iterator<Item = &str> + Send + '_ {
|
||||
self.content.lines()
|
||||
}
|
||||
|
||||
pub fn nth(&self, index: usize) -> Option<&str> {
|
||||
self.iter().nth(index)
|
||||
}
|
||||
}
|
||||
|
||||
fn select_file<A: Archive>(archive: &mut A) -> Result<String, ReadError> {
|
||||
let entry = {
|
||||
let mut entries = archive
|
||||
.entries()
|
||||
.filter(|entry| !entry.name().starts_with("__MACOSX"))
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
// todo: present a picker instead
|
||||
if entries.len() > 1 {
|
||||
return Err(ReadError::MultipleFiles);
|
||||
} else if entries.is_empty() {
|
||||
return Err(ReadError::NoFiles);
|
||||
}
|
||||
entries.pop().unwrap()
|
||||
};
|
||||
let raw = entry.extract()?;
|
||||
Ok(String::from_utf8(raw)?)
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue