mirror of
https://codeberg.org/icewind/logsmash.git
synced 2026-06-03 18:14:11 +02:00
initial log extraction logic
This commit is contained in:
commit
572582517c
18 changed files with 2827 additions and 0 deletions
12
logging-extractor/src/error.rs
Normal file
12
logging-extractor/src/error.rs
Normal file
|
|
@ -0,0 +1,12 @@
|
|||
use std::path::PathBuf;
|
||||
use thiserror::Error;
|
||||
|
||||
#[derive(Debug, Error)]
|
||||
pub enum Error {
|
||||
#[error("Failed to determine absolute root path ({}: {err:#}", path.display())]
|
||||
RealPath { path: PathBuf, err: std::io::Error },
|
||||
#[error("Failed to open source file ({}: {err:#}", path.display())]
|
||||
Open { path: PathBuf, err: std::io::Error },
|
||||
#[error("Failed to read source file ({}: {err:#}", path.display())]
|
||||
Read { path: PathBuf, err: std::io::Error },
|
||||
}
|
||||
141
logging-extractor/src/extractor.rs
Normal file
141
logging-extractor/src/extractor.rs
Normal file
|
|
@ -0,0 +1,141 @@
|
|||
use crate::{LogLevel, LoggingStatement};
|
||||
use tree_sitter::{Language, Node, Parser, Query, QueryCursor};
|
||||
|
||||
pub struct LogExtractor {
|
||||
language: Language,
|
||||
method_query: Query,
|
||||
string_query: Query,
|
||||
}
|
||||
|
||||
impl LogExtractor {
|
||||
pub fn new() -> Self {
|
||||
let language = tree_sitter_php::language_php();
|
||||
let method_query = Query::new(
|
||||
&language,
|
||||
r#"(
|
||||
member_call_expression
|
||||
name: (name)@name
|
||||
arguments: (arguments ((argument)+ @args))
|
||||
)"#,
|
||||
)
|
||||
.expect("invalid query");
|
||||
let string_query =
|
||||
Query::new(&language, r#"(string_content)@string"#).expect("invalid query");
|
||||
LogExtractor {
|
||||
language,
|
||||
method_query,
|
||||
string_query,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn extract<'a>(
|
||||
&self,
|
||||
path: &'a str,
|
||||
code: &'a str,
|
||||
) -> impl Iterator<Item = LoggingStatement<'a>> + 'a {
|
||||
let mut parser = Parser::new();
|
||||
|
||||
parser
|
||||
.set_language(&self.language)
|
||||
.expect("Error loading PHP grammar");
|
||||
parser.set_timeout_micros(10 * 1000 * 1000);
|
||||
|
||||
let tree = parser.parse(code, None).expect("parse timeout or canceled");
|
||||
|
||||
let mut log_call_cursor = QueryCursor::new();
|
||||
let log_calls = self.get_log_calls(&mut log_call_cursor, code, tree.root_node());
|
||||
log_calls
|
||||
.map(|call| {
|
||||
let mut string_cursor = QueryCursor::new();
|
||||
let message_parts = string_cursor
|
||||
.matches(&self.string_query, call.arguments, code.as_bytes())
|
||||
.map(|result| {
|
||||
result.captures[0]
|
||||
.node
|
||||
.utf8_text(code.as_bytes())
|
||||
.unwrap_or("malformed utf8")
|
||||
})
|
||||
.collect();
|
||||
|
||||
LoggingStatement {
|
||||
level: call.level,
|
||||
line: call.line + 1,
|
||||
path,
|
||||
message_parts,
|
||||
}
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
.into_iter()
|
||||
}
|
||||
|
||||
fn get_log_calls<'a>(
|
||||
&'a self,
|
||||
cursor: &'a mut QueryCursor,
|
||||
code: &'a str,
|
||||
node: Node<'a>,
|
||||
) -> impl Iterator<Item = LogCall> + 'a {
|
||||
let method_calls = cursor.matches(&self.method_query, node, code.as_bytes());
|
||||
|
||||
method_calls.filter_map(|method_call| {
|
||||
let name = method_call.captures[0]
|
||||
.node
|
||||
.utf8_text(code.as_bytes())
|
||||
.unwrap_or("malformed utf8");
|
||||
let level = LogLevel::parse(name)?;
|
||||
let line = method_call.captures[0].node.start_position().row;
|
||||
let arguments = method_call.captures[1].node;
|
||||
Some(LogCall {
|
||||
level,
|
||||
line,
|
||||
arguments,
|
||||
})
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for LogExtractor {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
struct LogCall<'tree> {
|
||||
level: LogLevel,
|
||||
line: usize,
|
||||
arguments: Node<'tree>,
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_extract_logging() {
|
||||
let code = r#"<?php
|
||||
function test() {
|
||||
$this->logger->warning("failed to find trash item for $rootTrashedItemName deleted at $rootTrashedItemDate in folder $groupFolderId", ['app' => 'groupfolders']);
|
||||
$logger->info("foobar");
|
||||
}
|
||||
?>
|
||||
"#;
|
||||
let extractor = LogExtractor::new();
|
||||
let logs = extractor.extract("foo.php", code).collect::<Vec<_>>();
|
||||
assert_eq!(
|
||||
logs[0],
|
||||
LoggingStatement {
|
||||
path: "foo.php",
|
||||
line: 3,
|
||||
level: LogLevel::Warn,
|
||||
message_parts: vec![
|
||||
"failed to find trash item for ",
|
||||
" deleted at ",
|
||||
" in folder "
|
||||
]
|
||||
}
|
||||
);
|
||||
assert_eq!(
|
||||
logs[1],
|
||||
LoggingStatement {
|
||||
path: "foo.php",
|
||||
line: 4,
|
||||
level: LogLevel::Info,
|
||||
message_parts: vec!["foobar"]
|
||||
}
|
||||
);
|
||||
}
|
||||
72
logging-extractor/src/level.rs
Normal file
72
logging-extractor/src/level.rs
Normal file
|
|
@ -0,0 +1,72 @@
|
|||
use std::fmt::{Display, Formatter};
|
||||
use serde::{Deserialize, Deserializer, Serialize, Serializer};
|
||||
|
||||
#[derive(Debug, Default, PartialEq)]
|
||||
pub enum LogLevel {
|
||||
Debug,
|
||||
Info,
|
||||
Notice,
|
||||
Warn,
|
||||
Error,
|
||||
Alert,
|
||||
Critical,
|
||||
Emergency,
|
||||
#[default]
|
||||
Unknown,
|
||||
}
|
||||
|
||||
impl LogLevel {
|
||||
pub fn parse(name: &str) -> Option<Self> {
|
||||
match name {
|
||||
"debug" => Some(LogLevel::Debug),
|
||||
"info" => Some(LogLevel::Info),
|
||||
"notice" => Some(LogLevel::Notice),
|
||||
"warn" | "warning" => Some(LogLevel::Warn),
|
||||
"error" => Some(LogLevel::Error),
|
||||
"alert" => Some(LogLevel::Alert),
|
||||
"critical" => Some(LogLevel::Critical),
|
||||
"emergency" => Some(LogLevel::Emergency),
|
||||
"log" => Some(LogLevel::Unknown),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn as_str(&self) -> &'static str {
|
||||
match self {
|
||||
LogLevel::Debug => "debug",
|
||||
LogLevel::Info => "info",
|
||||
LogLevel::Notice => "notice",
|
||||
LogLevel::Warn => "warn",
|
||||
LogLevel::Error => "error",
|
||||
LogLevel::Alert => "alert",
|
||||
LogLevel::Critical => "critical",
|
||||
LogLevel::Emergency => "emergency",
|
||||
LogLevel::Unknown => "log",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Serialize for LogLevel {
|
||||
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
|
||||
where
|
||||
S: Serializer,
|
||||
{
|
||||
self.as_str().serialize(serializer)
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for LogLevel {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
self.as_str().fmt(f)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'de> Deserialize<'de> for LogLevel {
|
||||
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
|
||||
where
|
||||
D: Deserializer<'de>
|
||||
{
|
||||
let s = <&str>::deserialize(deserializer)?;
|
||||
Ok(LogLevel::parse(s).unwrap_or_default())
|
||||
}
|
||||
}
|
||||
62
logging-extractor/src/lib.rs
Normal file
62
logging-extractor/src/lib.rs
Normal file
|
|
@ -0,0 +1,62 @@
|
|||
use crate::error::Error;
|
||||
use crate::extractor::LogExtractor;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::fs::File;
|
||||
use std::io::{Read, Write};
|
||||
use walkdir::WalkDir;
|
||||
|
||||
pub mod error;
|
||||
pub mod extractor;
|
||||
mod level;
|
||||
|
||||
pub use level::LogLevel;
|
||||
|
||||
#[derive(Debug, PartialEq, Serialize, Deserialize)]
|
||||
pub struct LoggingStatement<'a> {
|
||||
level: LogLevel,
|
||||
path: &'a str,
|
||||
line: usize,
|
||||
message_parts: Vec<&'a str>,
|
||||
}
|
||||
|
||||
pub fn extract_dir<W: Write>(root: &str, mut output: W) -> Result<(), Error> {
|
||||
let mut code_buff = String::with_capacity(32 * 1024 * 1024);
|
||||
|
||||
writeln!(&mut output, "[").ok();
|
||||
|
||||
let mut first_line = true;
|
||||
|
||||
let extractor = LogExtractor::new();
|
||||
|
||||
for file in WalkDir::new(root).into_iter().flatten() {
|
||||
let path = file.path();
|
||||
if let Some(path) = path.to_str() {
|
||||
if path.ends_with(".php") {
|
||||
code_buff.clear();
|
||||
|
||||
let rel_path = &path[root.len()..];
|
||||
|
||||
let mut fh = File::open(path).map_err(|err| Error::Open {
|
||||
path: path.into(),
|
||||
err,
|
||||
})?;
|
||||
fh.read_to_string(&mut code_buff)
|
||||
.map_err(|err| Error::Read {
|
||||
path: path.into(),
|
||||
err,
|
||||
})?;
|
||||
for log_item in extractor.extract(rel_path, &code_buff) {
|
||||
if !first_line {
|
||||
writeln!(&mut output, ",").ok();
|
||||
}
|
||||
first_line = false;
|
||||
let _ = serde_json::to_writer(&mut output, &log_item);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
writeln!(&mut output, "\n]").ok();
|
||||
|
||||
Ok(())
|
||||
}
|
||||
24
logging-extractor/src/main.rs
Normal file
24
logging-extractor/src/main.rs
Normal file
|
|
@ -0,0 +1,24 @@
|
|||
use clap::Parser;
|
||||
use logging_extractor::error::Error;
|
||||
use logging_extractor::extract_dir;
|
||||
use std::fs::canonicalize;
|
||||
use std::io::stdout;
|
||||
use std::path::PathBuf;
|
||||
|
||||
#[derive(Parser, Debug)]
|
||||
struct Args {
|
||||
root: PathBuf,
|
||||
}
|
||||
|
||||
fn main() -> Result<(), Error> {
|
||||
let args = Args::parse();
|
||||
let root = canonicalize(&args.root).map_err(|err| Error::RealPath {
|
||||
path: args.root,
|
||||
err,
|
||||
})?;
|
||||
let root = root.to_str().expect("non utf8 root path");
|
||||
|
||||
let stdout = stdout();
|
||||
|
||||
extract_dir(root, stdout)
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue