mirror of
https://codeberg.org/icewind/logsmash.git
synced 2026-06-03 18:14:11 +02:00
improve matching
This commit is contained in:
parent
04e391aea1
commit
9413b216ba
24 changed files with 73837 additions and 21689 deletions
20
src/error.rs
Normal file
20
src/error.rs
Normal file
|
|
@ -0,0 +1,20 @@
|
|||
use thiserror::Error;
|
||||
use zip::result::ZipError;
|
||||
|
||||
#[derive(Debug, Error)]
|
||||
pub enum LogError {
|
||||
#[error("Error while reading input file '{path}': {err:#}")]
|
||||
Read { err: ReadError, path: String },
|
||||
}
|
||||
|
||||
#[derive(Debug, Error)]
|
||||
pub enum ReadError {
|
||||
#[error(transparent)]
|
||||
Io(#[from] std::io::Error),
|
||||
#[error(transparent)]
|
||||
Zip(#[from] ZipError),
|
||||
#[error("archive contains multiple files")]
|
||||
MultipleFiles,
|
||||
#[error("archive contains no files")]
|
||||
NoFiles,
|
||||
}
|
||||
40
src/logfile.rs
Normal file
40
src/logfile.rs
Normal file
|
|
@ -0,0 +1,40 @@
|
|||
use crate::error::ReadError;
|
||||
use itertools::Either;
|
||||
use std::fs::File;
|
||||
use std::io::{BufRead, BufReader};
|
||||
use zip::ZipArchive;
|
||||
|
||||
pub enum LogFile {
|
||||
Plain(BufReader<File>),
|
||||
Zip(ZipArchive<File>),
|
||||
}
|
||||
|
||||
impl LogFile {
|
||||
pub fn open(path: &str) -> Result<LogFile, ReadError> {
|
||||
let file = File::open(path)?;
|
||||
if path.ends_with(".zip") {
|
||||
let mut zip = ZipArchive::new(file)?;
|
||||
if zip.len() > 1 {
|
||||
return Err(ReadError::MultipleFiles);
|
||||
} else if zip.is_empty() {
|
||||
return Err(ReadError::NoFiles);
|
||||
}
|
||||
// ensure we can open the file
|
||||
let _ = zip.by_index(0)?;
|
||||
|
||||
Ok(LogFile::Zip(zip))
|
||||
} else {
|
||||
Ok(LogFile::Plain(BufReader::new(file)))
|
||||
}
|
||||
}
|
||||
|
||||
pub fn iter(&mut self) -> impl Iterator<Item = String> + '_ {
|
||||
match self {
|
||||
LogFile::Plain(file) => Either::Left(file.lines().flatten()),
|
||||
LogFile::Zip(zip) => {
|
||||
let file = zip.by_index(0).expect("failed to open zip content again");
|
||||
Either::Right(BufReader::new(file).lines().flatten())
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
20
src/logline.rs
Normal file
20
src/logline.rs
Normal file
|
|
@ -0,0 +1,20 @@
|
|||
use serde::Deserialize;
|
||||
use std::borrow::Cow;
|
||||
|
||||
#[derive(Deserialize)]
|
||||
pub struct LogLine<'a> {
|
||||
pub version: &'a str,
|
||||
pub level: i64,
|
||||
pub message: Cow<'a, str>,
|
||||
}
|
||||
|
||||
impl LogLine<'_> {
|
||||
pub fn major_version(&self) -> Option<u32> {
|
||||
let major = self
|
||||
.version
|
||||
.split_once('.')
|
||||
.map(|(major, _)| major)
|
||||
.unwrap_or(self.version);
|
||||
major.parse().ok()
|
||||
}
|
||||
}
|
||||
136
src/main.rs
136
src/main.rs
|
|
@ -1,97 +1,81 @@
|
|||
use crate::error::LogError;
|
||||
use crate::logfile::LogFile;
|
||||
use crate::logline::LogLine;
|
||||
use crate::matcher::Matcher;
|
||||
use clap::Parser;
|
||||
use cloud_log_analyser_data::get_statements;
|
||||
use serde::Deserialize;
|
||||
use std::borrow::Cow;
|
||||
use cloud_log_analyser_data::{get_statements, MAX_VERSION};
|
||||
use main_error::MainResult;
|
||||
use std::collections::HashMap;
|
||||
use std::fs::File;
|
||||
use std::io::{BufRead, BufReader};
|
||||
use std::iter::once;
|
||||
use std::ops::AddAssign;
|
||||
|
||||
mod error;
|
||||
mod logfile;
|
||||
mod logline;
|
||||
mod matcher;
|
||||
|
||||
#[derive(Debug, Parser)]
|
||||
enum Args {
|
||||
Log(LogCommand),
|
||||
File(FileCommand),
|
||||
}
|
||||
|
||||
#[derive(Debug, Parser)]
|
||||
struct LogCommand {
|
||||
line: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Parser)]
|
||||
struct FileCommand {
|
||||
struct Args {
|
||||
file: String,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct LogLine<'a> {
|
||||
version: &'a str,
|
||||
level: i64,
|
||||
message: Cow<'a, str>,
|
||||
}
|
||||
|
||||
fn main() {
|
||||
fn main() -> MainResult {
|
||||
let args = Args::parse();
|
||||
|
||||
match args {
|
||||
Args::Log(LogCommand { line }) => {
|
||||
let parsed_line: LogLine = serde_json::from_str(&line).unwrap();
|
||||
let major = parsed_line.version.split(".").next().unwrap();
|
||||
let major = major.parse().unwrap();
|
||||
let statements = get_statements("server", major);
|
||||
let matcher = Matcher::new(statements);
|
||||
let index = matcher.match_log(parsed_line.level.into(), parsed_line.message.as_ref());
|
||||
if let Some(index) = index {
|
||||
let statement = &statements[index];
|
||||
println!("match found: {} line {}", statement.path, statement.line);
|
||||
} else {
|
||||
eprintln!("No match found");
|
||||
}
|
||||
}
|
||||
Args::File(FileCommand { file }) => {
|
||||
let file = BufReader::new(File::open(file).unwrap());
|
||||
let mut counts: HashMap<usize, usize> = HashMap::default();
|
||||
let mut lines = file.lines().flatten();
|
||||
let first = lines.next().unwrap();
|
||||
let first_parsed: LogLine = serde_json::from_str(&first).unwrap();
|
||||
let mut log_file = LogFile::open(&args.file).map_err(|err| LogError::Read {
|
||||
err,
|
||||
path: args.file,
|
||||
})?;
|
||||
let mut lines = log_file.iter();
|
||||
|
||||
let major = first_parsed.version.split(".").next().unwrap();
|
||||
let major = major.parse().unwrap();
|
||||
let statements = get_statements("server", major);
|
||||
let matcher = Matcher::new(statements);
|
||||
let mut counts: HashMap<usize, usize> = HashMap::new();
|
||||
let first = lines.next().unwrap();
|
||||
let first_parsed: LogLine = serde_json::from_str(&first).unwrap();
|
||||
|
||||
let lines = once(first).chain(lines);
|
||||
let mut error_count = 0;
|
||||
for line in lines {
|
||||
if line.starts_with('{') {
|
||||
let parsed = match serde_json::from_str::<LogLine>(&line) {
|
||||
Ok(parsed) => parsed,
|
||||
Err(_) => {
|
||||
error_count += 1;
|
||||
continue;
|
||||
}
|
||||
};
|
||||
if let Some(index) =
|
||||
matcher.match_log(parsed.level.into(), parsed.message.as_ref())
|
||||
{
|
||||
counts.entry(index).or_default().add_assign(1);
|
||||
}
|
||||
let statements = get_statements(
|
||||
"server",
|
||||
first_parsed.major_version().unwrap_or(MAX_VERSION),
|
||||
);
|
||||
let matcher = Matcher::new(statements);
|
||||
|
||||
let lines = once(first).chain(lines);
|
||||
let mut error_count = 0;
|
||||
let mut unmatched = 0;
|
||||
for line in lines {
|
||||
if line.starts_with('{') {
|
||||
let parsed = match serde_json::from_str::<LogLine>(&line) {
|
||||
Ok(parsed) => parsed,
|
||||
Err(_) => {
|
||||
error_count += 1;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
let mut counts: Vec<(_, _)> = counts.into_iter().collect();
|
||||
counts.sort_by_key(|(_, count)| *count);
|
||||
counts.reverse();
|
||||
for (index, count) in counts {
|
||||
let statement = &statements[index];
|
||||
println!("{} line {}: {}", statement.path, statement.line, count);
|
||||
}
|
||||
if error_count > 0 {
|
||||
eprintln!("{error_count} lines failed to parse as valid log json");
|
||||
};
|
||||
if let Some(index) = matcher.match_log(parsed.level.into(), parsed.message.as_ref()) {
|
||||
counts.entry(index).or_default().add_assign(1);
|
||||
} else {
|
||||
unmatched += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
let mut counts: Vec<(_, _)> = counts.into_iter().collect();
|
||||
counts.sort_by_key(|(_, count)| *count);
|
||||
counts.reverse();
|
||||
for (index, count) in counts {
|
||||
let statement = &statements[index];
|
||||
println!(
|
||||
"{}: {} line {}: {}",
|
||||
statement.message(),
|
||||
statement.path,
|
||||
statement.line,
|
||||
count
|
||||
);
|
||||
}
|
||||
if unmatched > 0 {
|
||||
eprintln!("{unmatched} lines couldn't be matched");
|
||||
}
|
||||
if error_count > 0 {
|
||||
eprintln!("{error_count} lines failed to parse as valid log json");
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
|
|
|||
|
|
@ -5,6 +5,8 @@ pub struct LogMatch {
|
|||
level: LogLevel,
|
||||
pattern: Regex,
|
||||
pattern_length: usize,
|
||||
has_meaningful_message: bool,
|
||||
exception: Option<&'static str>,
|
||||
}
|
||||
|
||||
impl LogMatch {
|
||||
|
|
@ -13,6 +15,8 @@ impl LogMatch {
|
|||
level: statement.level,
|
||||
pattern: Regex::new(statement.regex).unwrap(),
|
||||
pattern_length: statement.regex.len(),
|
||||
has_meaningful_message: statement.has_meaningful_message,
|
||||
exception: statement.exception,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -33,14 +37,16 @@ impl Matcher {
|
|||
let mut best_length = 0;
|
||||
|
||||
for (i, log_match) in self.matches.iter().enumerate() {
|
||||
if (log_match.level == level
|
||||
|| log_match.level == LogLevel::Exception
|
||||
|| level == LogLevel::Unknown)
|
||||
&& log_match.pattern.is_match(message)
|
||||
&& log_match.pattern_length > best_length
|
||||
{
|
||||
best_match = Some(i);
|
||||
best_length = log_match.pattern_length;
|
||||
if log_match.has_meaningful_message {
|
||||
if (log_match.level == level
|
||||
|| log_match.level == LogLevel::Exception
|
||||
|| level == LogLevel::Unknown)
|
||||
&& log_match.pattern.is_match(message)
|
||||
&& log_match.pattern_length > best_length
|
||||
{
|
||||
best_match = Some(i);
|
||||
best_length = log_match.pattern_length;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue