initial log extraction logic

This commit is contained in:
Robin Appelman 2024-07-20 16:13:58 +02:00
commit 572582517c
18 changed files with 2827 additions and 0 deletions

View file

@ -0,0 +1,12 @@
use std::path::PathBuf;
use thiserror::Error;
#[derive(Debug, Error)]
pub enum Error {
#[error("Failed to determine absolute root path ({}: {err:#}", path.display())]
RealPath { path: PathBuf, err: std::io::Error },
#[error("Failed to open source file ({}: {err:#}", path.display())]
Open { path: PathBuf, err: std::io::Error },
#[error("Failed to read source file ({}: {err:#}", path.display())]
Read { path: PathBuf, err: std::io::Error },
}

View file

@ -0,0 +1,141 @@
use crate::{LogLevel, LoggingStatement};
use tree_sitter::{Language, Node, Parser, Query, QueryCursor};
pub struct LogExtractor {
language: Language,
method_query: Query,
string_query: Query,
}
impl LogExtractor {
pub fn new() -> Self {
let language = tree_sitter_php::language_php();
let method_query = Query::new(
&language,
r#"(
member_call_expression
name: (name)@name
arguments: (arguments ((argument)+ @args))
)"#,
)
.expect("invalid query");
let string_query =
Query::new(&language, r#"(string_content)@string"#).expect("invalid query");
LogExtractor {
language,
method_query,
string_query,
}
}
pub fn extract<'a>(
&self,
path: &'a str,
code: &'a str,
) -> impl Iterator<Item = LoggingStatement<'a>> + 'a {
let mut parser = Parser::new();
parser
.set_language(&self.language)
.expect("Error loading PHP grammar");
parser.set_timeout_micros(10 * 1000 * 1000);
let tree = parser.parse(code, None).expect("parse timeout or canceled");
let mut log_call_cursor = QueryCursor::new();
let log_calls = self.get_log_calls(&mut log_call_cursor, code, tree.root_node());
log_calls
.map(|call| {
let mut string_cursor = QueryCursor::new();
let message_parts = string_cursor
.matches(&self.string_query, call.arguments, code.as_bytes())
.map(|result| {
result.captures[0]
.node
.utf8_text(code.as_bytes())
.unwrap_or("malformed utf8")
})
.collect();
LoggingStatement {
level: call.level,
line: call.line + 1,
path,
message_parts,
}
})
.collect::<Vec<_>>()
.into_iter()
}
fn get_log_calls<'a>(
&'a self,
cursor: &'a mut QueryCursor,
code: &'a str,
node: Node<'a>,
) -> impl Iterator<Item = LogCall> + 'a {
let method_calls = cursor.matches(&self.method_query, node, code.as_bytes());
method_calls.filter_map(|method_call| {
let name = method_call.captures[0]
.node
.utf8_text(code.as_bytes())
.unwrap_or("malformed utf8");
let level = LogLevel::parse(name)?;
let line = method_call.captures[0].node.start_position().row;
let arguments = method_call.captures[1].node;
Some(LogCall {
level,
line,
arguments,
})
})
}
}
impl Default for LogExtractor {
fn default() -> Self {
Self::new()
}
}
struct LogCall<'tree> {
level: LogLevel,
line: usize,
arguments: Node<'tree>,
}
#[test]
fn test_extract_logging() {
let code = r#"<?php
function test() {
$this->logger->warning("failed to find trash item for $rootTrashedItemName deleted at $rootTrashedItemDate in folder $groupFolderId", ['app' => 'groupfolders']);
$logger->info("foobar");
}
?>
"#;
let extractor = LogExtractor::new();
let logs = extractor.extract("foo.php", code).collect::<Vec<_>>();
assert_eq!(
logs[0],
LoggingStatement {
path: "foo.php",
line: 3,
level: LogLevel::Warn,
message_parts: vec![
"failed to find trash item for ",
" deleted at ",
" in folder "
]
}
);
assert_eq!(
logs[1],
LoggingStatement {
path: "foo.php",
line: 4,
level: LogLevel::Info,
message_parts: vec!["foobar"]
}
);
}

View file

@ -0,0 +1,72 @@
use std::fmt::{Display, Formatter};
use serde::{Deserialize, Deserializer, Serialize, Serializer};
#[derive(Debug, Default, PartialEq)]
pub enum LogLevel {
Debug,
Info,
Notice,
Warn,
Error,
Alert,
Critical,
Emergency,
#[default]
Unknown,
}
impl LogLevel {
pub fn parse(name: &str) -> Option<Self> {
match name {
"debug" => Some(LogLevel::Debug),
"info" => Some(LogLevel::Info),
"notice" => Some(LogLevel::Notice),
"warn" | "warning" => Some(LogLevel::Warn),
"error" => Some(LogLevel::Error),
"alert" => Some(LogLevel::Alert),
"critical" => Some(LogLevel::Critical),
"emergency" => Some(LogLevel::Emergency),
"log" => Some(LogLevel::Unknown),
_ => None,
}
}
pub fn as_str(&self) -> &'static str {
match self {
LogLevel::Debug => "debug",
LogLevel::Info => "info",
LogLevel::Notice => "notice",
LogLevel::Warn => "warn",
LogLevel::Error => "error",
LogLevel::Alert => "alert",
LogLevel::Critical => "critical",
LogLevel::Emergency => "emergency",
LogLevel::Unknown => "log",
}
}
}
impl Serialize for LogLevel {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
self.as_str().serialize(serializer)
}
}
impl Display for LogLevel {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
self.as_str().fmt(f)
}
}
impl<'de> Deserialize<'de> for LogLevel {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>
{
let s = <&str>::deserialize(deserializer)?;
Ok(LogLevel::parse(s).unwrap_or_default())
}
}

View file

@ -0,0 +1,62 @@
use crate::error::Error;
use crate::extractor::LogExtractor;
use serde::{Deserialize, Serialize};
use std::fs::File;
use std::io::{Read, Write};
use walkdir::WalkDir;
pub mod error;
pub mod extractor;
mod level;
pub use level::LogLevel;
#[derive(Debug, PartialEq, Serialize, Deserialize)]
pub struct LoggingStatement<'a> {
level: LogLevel,
path: &'a str,
line: usize,
message_parts: Vec<&'a str>,
}
pub fn extract_dir<W: Write>(root: &str, mut output: W) -> Result<(), Error> {
let mut code_buff = String::with_capacity(32 * 1024 * 1024);
writeln!(&mut output, "[").ok();
let mut first_line = true;
let extractor = LogExtractor::new();
for file in WalkDir::new(root).into_iter().flatten() {
let path = file.path();
if let Some(path) = path.to_str() {
if path.ends_with(".php") {
code_buff.clear();
let rel_path = &path[root.len()..];
let mut fh = File::open(path).map_err(|err| Error::Open {
path: path.into(),
err,
})?;
fh.read_to_string(&mut code_buff)
.map_err(|err| Error::Read {
path: path.into(),
err,
})?;
for log_item in extractor.extract(rel_path, &code_buff) {
if !first_line {
writeln!(&mut output, ",").ok();
}
first_line = false;
let _ = serde_json::to_writer(&mut output, &log_item);
}
}
}
}
writeln!(&mut output, "\n]").ok();
Ok(())
}

View file

@ -0,0 +1,24 @@
use clap::Parser;
use logging_extractor::error::Error;
use logging_extractor::extract_dir;
use std::fs::canonicalize;
use std::io::stdout;
use std::path::PathBuf;
#[derive(Parser, Debug)]
struct Args {
root: PathBuf,
}
fn main() -> Result<(), Error> {
let args = Args::parse();
let root = canonicalize(&args.root).map_err(|err| Error::RealPath {
path: args.root,
err,
})?;
let root = root.to_str().expect("non utf8 root path");
let stdout = stdout();
extract_dir(root, stdout)
}