mirror of
https://codeberg.org/icewind/logsmash.git
synced 2026-06-03 18:14:11 +02:00
parallel matching
This commit is contained in:
parent
2bb7f2b47f
commit
e9b5807127
6 changed files with 97 additions and 61 deletions
|
|
@ -4,7 +4,6 @@ use crate::matcher::MatchResult;
|
|||
use crate::timegraph::TimeGraph;
|
||||
use logsmash_data::StatementList;
|
||||
use std::collections::BTreeMap;
|
||||
use std::sync::Mutex;
|
||||
use time::OffsetDateTime;
|
||||
|
||||
pub struct App {
|
||||
|
|
@ -16,7 +15,7 @@ pub struct App {
|
|||
pub error_count: usize,
|
||||
pub all: LogMatch,
|
||||
pub unmatched: LogMatch,
|
||||
pub log_file: Mutex<LogFile>,
|
||||
pub log_file: LogFile,
|
||||
}
|
||||
|
||||
impl App {
|
||||
|
|
@ -29,8 +28,8 @@ impl App {
|
|||
self.matches.len() + 1 + unmatched_line_count
|
||||
}
|
||||
|
||||
pub fn get_line(&self, index: usize) -> Option<String> {
|
||||
self.log_file.lock().unwrap().nth(index)
|
||||
pub fn get_line(&self, index: usize) -> Option<&str> {
|
||||
self.log_file.nth(index)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,17 +1,15 @@
|
|||
use crate::error::ReadError;
|
||||
use itertools::Either;
|
||||
use std::fs::File;
|
||||
use std::io::{BufRead, BufReader, Seek};
|
||||
use std::io::Read;
|
||||
use zip::ZipArchive;
|
||||
|
||||
pub enum LogFile {
|
||||
Plain(BufReader<File>),
|
||||
Zip(ZipArchive<File>),
|
||||
pub struct LogFile {
|
||||
content: String,
|
||||
}
|
||||
|
||||
impl LogFile {
|
||||
pub fn open(path: &str) -> Result<LogFile, ReadError> {
|
||||
let file = File::open(path)?;
|
||||
let mut file = File::open(path)?;
|
||||
if path.ends_with(".zip") {
|
||||
let mut zip = ZipArchive::new(file)?;
|
||||
if zip.len() > 1 {
|
||||
|
|
@ -19,40 +17,25 @@ impl LogFile {
|
|||
} else if zip.is_empty() {
|
||||
return Err(ReadError::NoFiles);
|
||||
}
|
||||
// ensure we can open the file
|
||||
let _ = zip.by_index(0)?;
|
||||
|
||||
Ok(LogFile::Zip(zip))
|
||||
let mut log = zip.by_index(0)?;
|
||||
let mut content = String::with_capacity(log.size() as usize);
|
||||
log.read_to_string(&mut content)?;
|
||||
|
||||
Ok(LogFile { content })
|
||||
} else {
|
||||
Ok(LogFile::Plain(BufReader::new(file)))
|
||||
let mut content = String::new();
|
||||
file.read_to_string(&mut content)?;
|
||||
|
||||
Ok(LogFile { content })
|
||||
}
|
||||
}
|
||||
|
||||
pub fn iter(&mut self) -> impl Iterator<Item = String> + '_ {
|
||||
match self {
|
||||
LogFile::Plain(file) => Either::Left(file.lines().flatten()),
|
||||
LogFile::Zip(zip) => {
|
||||
let file = zip.by_index(0).expect("failed to open zip content again");
|
||||
Either::Right(BufReader::new(file).lines().flatten())
|
||||
}
|
||||
}
|
||||
pub fn iter<'a>(&'a self) -> impl Iterator<Item = &'a str> + Send + 'a {
|
||||
self.content.lines()
|
||||
}
|
||||
|
||||
pub fn nth(&mut self, index: usize) -> Option<String> {
|
||||
match self {
|
||||
LogFile::Plain(file) => {
|
||||
file.rewind().unwrap();
|
||||
file.lines().nth(index).transpose().ok().flatten()
|
||||
}
|
||||
LogFile::Zip(zip) => {
|
||||
let file = zip.by_index(0).expect("failed to open zip content again");
|
||||
BufReader::new(file)
|
||||
.lines()
|
||||
.nth(index)
|
||||
.transpose()
|
||||
.ok()
|
||||
.flatten()
|
||||
}
|
||||
}
|
||||
pub fn nth(&self, index: usize) -> Option<&str> {
|
||||
self.iter().nth(index)
|
||||
}
|
||||
}
|
||||
|
|
|
|||
59
src/main.rs
59
src/main.rs
|
|
@ -8,10 +8,11 @@ use base64::prelude::*;
|
|||
use clap::Parser;
|
||||
use logsmash_data::{default_apps, get_statements, SourceDefinition};
|
||||
use main_error::MainResult;
|
||||
use rayon::prelude::ParallelBridge;
|
||||
use rayon::prelude::*;
|
||||
use std::borrow::Cow;
|
||||
use std::collections::HashMap;
|
||||
use std::iter::once;
|
||||
use std::sync::Mutex;
|
||||
|
||||
mod app;
|
||||
mod error;
|
||||
|
|
@ -32,7 +33,7 @@ struct Args {
|
|||
fn main() -> MainResult {
|
||||
let args = Args::parse();
|
||||
|
||||
let mut log_file = LogFile::open(&args.file).map_err(|err| LogError::Read {
|
||||
let log_file = LogFile::open(&args.file).map_err(|err| LogError::Read {
|
||||
err,
|
||||
path: args.file,
|
||||
})?;
|
||||
|
|
@ -59,32 +60,44 @@ fn main() -> MainResult {
|
|||
let matcher = Matcher::new(&statements);
|
||||
|
||||
let lines = once(first).chain(lines);
|
||||
|
||||
let results: Vec<_> = lines
|
||||
.enumerate()
|
||||
.par_bridge()
|
||||
.filter(|(_, line)| line.starts_with('{'))
|
||||
.map(|(index, line)| {
|
||||
let mut parsed = serde_json::from_str::<LogLine>(&line)?;
|
||||
parsed.index = index;
|
||||
let log_match = matcher.match_log(&parsed);
|
||||
Result::<_, serde_json::Error>::Ok((parsed, log_match))
|
||||
})
|
||||
.collect();
|
||||
|
||||
let mut error_count = 0;
|
||||
let mut unmatched_counts: HashMap<String, Vec<usize>> = HashMap::new();
|
||||
let mut parsed_lines = Vec::with_capacity(1024);
|
||||
let mut unmatched_lines = Vec::with_capacity(256);
|
||||
let mut parsed_index = 0;
|
||||
for (index, line) in lines.enumerate() {
|
||||
if line.starts_with('{') {
|
||||
let mut parsed = match serde_json::from_str::<LogLine>(&line) {
|
||||
Ok(parsed) => parsed,
|
||||
Err(_) => {
|
||||
error_count += 1;
|
||||
continue;
|
||||
}
|
||||
};
|
||||
parsed.index = index;
|
||||
if let Some(index) = matcher.match_log(&parsed) {
|
||||
counts.entry(index).or_default().push(parsed_index);
|
||||
} else if let Some(entry) = unmatched_counts.get_mut(parsed.app.as_str()) {
|
||||
entry.push(parsed_index)
|
||||
} else {
|
||||
unmatched_lines.push(parsed_index);
|
||||
|
||||
for result in results {
|
||||
let parsed = match result {
|
||||
Ok((parsed, Some(match_result))) => {
|
||||
counts.entry(match_result).or_default().push(parsed_index);
|
||||
parsed
|
||||
}
|
||||
parsed_lines.push(parsed);
|
||||
parsed_index += 1;
|
||||
}
|
||||
Ok((parsed, None)) => {
|
||||
unmatched_lines.push(parsed_index);
|
||||
parsed
|
||||
}
|
||||
Err(_) => {
|
||||
error_count += 1;
|
||||
continue;
|
||||
}
|
||||
};
|
||||
|
||||
parsed_lines.push(parsed);
|
||||
parsed_index += 1;
|
||||
}
|
||||
parsed_lines.sort_by_key(|line| line.index);
|
||||
|
||||
let mut matched_lines: Vec<(_, _)> = counts.into_iter().collect();
|
||||
matched_lines.sort_by_key(|(_, lines)| lines.len());
|
||||
|
|
@ -111,7 +124,7 @@ fn main() -> MainResult {
|
|||
unmatched,
|
||||
all,
|
||||
error_count,
|
||||
log_file: Mutex::new(log_file),
|
||||
log_file,
|
||||
};
|
||||
|
||||
if args.profile {
|
||||
|
|
|
|||
|
|
@ -38,7 +38,7 @@ pub struct Matcher {
|
|||
|
||||
impl Matcher {
|
||||
pub fn new(statements: &StatementList) -> Matcher {
|
||||
let mut matches: Vec<_> = statements
|
||||
let matches: Vec<_> = statements
|
||||
.iter()
|
||||
.enumerate()
|
||||
.map(|(index, statement)| LogMatch::new(index, statement))
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue