improve matching

This commit is contained in:
Robin Appelman 2024-07-22 18:57:18 +02:00
commit 9413b216ba
24 changed files with 73837 additions and 21689 deletions

20
src/error.rs Normal file
View file

@ -0,0 +1,20 @@
use thiserror::Error;
use zip::result::ZipError;
#[derive(Debug, Error)]
pub enum LogError {
#[error("Error while reading input file '{path}': {err:#}")]
Read { err: ReadError, path: String },
}
#[derive(Debug, Error)]
pub enum ReadError {
#[error(transparent)]
Io(#[from] std::io::Error),
#[error(transparent)]
Zip(#[from] ZipError),
#[error("archive contains multiple files")]
MultipleFiles,
#[error("archive contains no files")]
NoFiles,
}

40
src/logfile.rs Normal file
View file

@ -0,0 +1,40 @@
use crate::error::ReadError;
use itertools::Either;
use std::fs::File;
use std::io::{BufRead, BufReader};
use zip::ZipArchive;
pub enum LogFile {
Plain(BufReader<File>),
Zip(ZipArchive<File>),
}
impl LogFile {
pub fn open(path: &str) -> Result<LogFile, ReadError> {
let file = File::open(path)?;
if path.ends_with(".zip") {
let mut zip = ZipArchive::new(file)?;
if zip.len() > 1 {
return Err(ReadError::MultipleFiles);
} else if zip.is_empty() {
return Err(ReadError::NoFiles);
}
// ensure we can open the file
let _ = zip.by_index(0)?;
Ok(LogFile::Zip(zip))
} else {
Ok(LogFile::Plain(BufReader::new(file)))
}
}
pub fn iter(&mut self) -> impl Iterator<Item = String> + '_ {
match self {
LogFile::Plain(file) => Either::Left(file.lines().flatten()),
LogFile::Zip(zip) => {
let file = zip.by_index(0).expect("failed to open zip content again");
Either::Right(BufReader::new(file).lines().flatten())
}
}
}
}

20
src/logline.rs Normal file
View file

@ -0,0 +1,20 @@
use serde::Deserialize;
use std::borrow::Cow;
#[derive(Deserialize)]
pub struct LogLine<'a> {
pub version: &'a str,
pub level: i64,
pub message: Cow<'a, str>,
}
impl LogLine<'_> {
pub fn major_version(&self) -> Option<u32> {
let major = self
.version
.split_once('.')
.map(|(major, _)| major)
.unwrap_or(self.version);
major.parse().ok()
}
}

View file

@ -1,97 +1,81 @@
use crate::error::LogError;
use crate::logfile::LogFile;
use crate::logline::LogLine;
use crate::matcher::Matcher;
use clap::Parser;
use cloud_log_analyser_data::get_statements;
use serde::Deserialize;
use std::borrow::Cow;
use cloud_log_analyser_data::{get_statements, MAX_VERSION};
use main_error::MainResult;
use std::collections::HashMap;
use std::fs::File;
use std::io::{BufRead, BufReader};
use std::iter::once;
use std::ops::AddAssign;
mod error;
mod logfile;
mod logline;
mod matcher;
#[derive(Debug, Parser)]
enum Args {
Log(LogCommand),
File(FileCommand),
}
#[derive(Debug, Parser)]
struct LogCommand {
line: String,
}
#[derive(Debug, Parser)]
struct FileCommand {
struct Args {
file: String,
}
#[derive(Deserialize)]
struct LogLine<'a> {
version: &'a str,
level: i64,
message: Cow<'a, str>,
}
fn main() {
fn main() -> MainResult {
let args = Args::parse();
match args {
Args::Log(LogCommand { line }) => {
let parsed_line: LogLine = serde_json::from_str(&line).unwrap();
let major = parsed_line.version.split(".").next().unwrap();
let major = major.parse().unwrap();
let statements = get_statements("server", major);
let matcher = Matcher::new(statements);
let index = matcher.match_log(parsed_line.level.into(), parsed_line.message.as_ref());
if let Some(index) = index {
let statement = &statements[index];
println!("match found: {} line {}", statement.path, statement.line);
} else {
eprintln!("No match found");
}
}
Args::File(FileCommand { file }) => {
let file = BufReader::new(File::open(file).unwrap());
let mut counts: HashMap<usize, usize> = HashMap::default();
let mut lines = file.lines().flatten();
let first = lines.next().unwrap();
let first_parsed: LogLine = serde_json::from_str(&first).unwrap();
let mut log_file = LogFile::open(&args.file).map_err(|err| LogError::Read {
err,
path: args.file,
})?;
let mut lines = log_file.iter();
let major = first_parsed.version.split(".").next().unwrap();
let major = major.parse().unwrap();
let statements = get_statements("server", major);
let matcher = Matcher::new(statements);
let mut counts: HashMap<usize, usize> = HashMap::new();
let first = lines.next().unwrap();
let first_parsed: LogLine = serde_json::from_str(&first).unwrap();
let lines = once(first).chain(lines);
let mut error_count = 0;
for line in lines {
if line.starts_with('{') {
let parsed = match serde_json::from_str::<LogLine>(&line) {
Ok(parsed) => parsed,
Err(_) => {
error_count += 1;
continue;
}
};
if let Some(index) =
matcher.match_log(parsed.level.into(), parsed.message.as_ref())
{
counts.entry(index).or_default().add_assign(1);
}
let statements = get_statements(
"server",
first_parsed.major_version().unwrap_or(MAX_VERSION),
);
let matcher = Matcher::new(statements);
let lines = once(first).chain(lines);
let mut error_count = 0;
let mut unmatched = 0;
for line in lines {
if line.starts_with('{') {
let parsed = match serde_json::from_str::<LogLine>(&line) {
Ok(parsed) => parsed,
Err(_) => {
error_count += 1;
continue;
}
}
let mut counts: Vec<(_, _)> = counts.into_iter().collect();
counts.sort_by_key(|(_, count)| *count);
counts.reverse();
for (index, count) in counts {
let statement = &statements[index];
println!("{} line {}: {}", statement.path, statement.line, count);
}
if error_count > 0 {
eprintln!("{error_count} lines failed to parse as valid log json");
};
if let Some(index) = matcher.match_log(parsed.level.into(), parsed.message.as_ref()) {
counts.entry(index).or_default().add_assign(1);
} else {
unmatched += 1;
}
}
}
let mut counts: Vec<(_, _)> = counts.into_iter().collect();
counts.sort_by_key(|(_, count)| *count);
counts.reverse();
for (index, count) in counts {
let statement = &statements[index];
println!(
"{}: {} line {}: {}",
statement.message(),
statement.path,
statement.line,
count
);
}
if unmatched > 0 {
eprintln!("{unmatched} lines couldn't be matched");
}
if error_count > 0 {
eprintln!("{error_count} lines failed to parse as valid log json");
}
Ok(())
}

View file

@ -5,6 +5,8 @@ pub struct LogMatch {
level: LogLevel,
pattern: Regex,
pattern_length: usize,
has_meaningful_message: bool,
exception: Option<&'static str>,
}
impl LogMatch {
@ -13,6 +15,8 @@ impl LogMatch {
level: statement.level,
pattern: Regex::new(statement.regex).unwrap(),
pattern_length: statement.regex.len(),
has_meaningful_message: statement.has_meaningful_message,
exception: statement.exception,
}
}
}
@ -33,14 +37,16 @@ impl Matcher {
let mut best_length = 0;
for (i, log_match) in self.matches.iter().enumerate() {
if (log_match.level == level
|| log_match.level == LogLevel::Exception
|| level == LogLevel::Unknown)
&& log_match.pattern.is_match(message)
&& log_match.pattern_length > best_length
{
best_match = Some(i);
best_length = log_match.pattern_length;
if log_match.has_meaningful_message {
if (log_match.level == level
|| log_match.level == LogLevel::Exception
|| level == LogLevel::Unknown)
&& log_match.pattern.is_match(message)
&& log_match.pattern_length > best_length
{
best_match = Some(i);
best_length = log_match.pattern_length;
}
}
}