mirror of
https://codeberg.org/icewind/logsmash.git
synced 2026-06-03 18:14:11 +02:00
generate better regexes and capture placeholder names
This commit is contained in:
parent
19c1c57acc
commit
04e391aea1
15 changed files with 21695 additions and 65312 deletions
|
|
@ -1,5 +1,4 @@
|
|||
use databake::Bake;
|
||||
use std::borrow::Cow;
|
||||
|
||||
#[derive(Debug, Default, PartialEq, Clone, Copy, Bake)]
|
||||
#[databake(path = crate)]
|
||||
|
|
@ -40,34 +39,43 @@ pub struct LoggingStatement<'a> {
|
|||
pub level: LogLevel,
|
||||
pub path: &'a str,
|
||||
pub line: usize,
|
||||
pub message_parts: &'a [&'a str],
|
||||
pub placeholders: &'a [&'a str],
|
||||
pub regex: &'a str,
|
||||
}
|
||||
|
||||
fn build_pattern<'a>(parts: &[crate::MessagePart]) -> String {
|
||||
let mut pattern = String::with_capacity(128);
|
||||
pattern.push('^');
|
||||
for part in parts {
|
||||
match part {
|
||||
crate::MessagePart::Literal(literal) => {
|
||||
pattern.push_str(®ex_syntax::escape(literal))
|
||||
}
|
||||
crate::MessagePart::PlaceHolder(_placeholder) => {
|
||||
pattern.push_str("(.*)");
|
||||
}
|
||||
}
|
||||
}
|
||||
pattern.push('$');
|
||||
pattern
|
||||
}
|
||||
|
||||
pub fn bake_statement(output: &mut String, statement: &crate::LoggingStatement) {
|
||||
let message_parts: Vec<_> = statement.message_parts.iter().map(Cow::as_ref).collect();
|
||||
let placeholders: Vec<_> = statement
|
||||
.message_parts
|
||||
.iter()
|
||||
.filter_map(|part| match part {
|
||||
crate::MessagePart::PlaceHolder(placeholder) => Some(placeholder.as_str()),
|
||||
_ => None,
|
||||
})
|
||||
.collect();
|
||||
let pattern = build_pattern(&statement.message_parts);
|
||||
let statement = LoggingStatement {
|
||||
level: statement.level.into(),
|
||||
path: statement.path,
|
||||
line: statement.line,
|
||||
message_parts: &message_parts,
|
||||
placeholders: &placeholders,
|
||||
regex: &pattern,
|
||||
};
|
||||
output.push_str(&statement.bake(&Default::default()).to_string());
|
||||
}
|
||||
|
||||
#[cfg(feature = "bake")]
|
||||
mod bake_test {
|
||||
#[test]
|
||||
fn test_bake() {
|
||||
use databake::test_bake;
|
||||
test_bake!(
|
||||
crate::LoggingStatement,
|
||||
const: crate::LoggingStatement {
|
||||
level: crate::LogLevel::Debug,
|
||||
path: "foo",
|
||||
line: 12usize,
|
||||
message_parts: &["part1", "part2"]
|
||||
},
|
||||
cloud_log_analyser,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,13 +1,11 @@
|
|||
use crate::string::{unescape, DoubleQuoteString, SingleQuoteString};
|
||||
use crate::{LogLevel, LoggingStatement};
|
||||
use std::borrow::Cow;
|
||||
use crate::{LogLevel, LoggingStatement, MessagePart};
|
||||
use tree_sitter::{Language, Node, Parser, Query, QueryCursor};
|
||||
|
||||
pub struct LogExtractor {
|
||||
language: Language,
|
||||
method_query: Query,
|
||||
throw_query: Query,
|
||||
string_query: Query,
|
||||
}
|
||||
|
||||
impl LogExtractor {
|
||||
|
|
@ -30,13 +28,10 @@ impl LogExtractor {
|
|||
)"#,
|
||||
)
|
||||
.expect("invalid query");
|
||||
let string_query = Query::new(&language, r#"[(string_content)(escape_sequence)]@string"#)
|
||||
.expect("invalid query");
|
||||
LogExtractor {
|
||||
language,
|
||||
method_query,
|
||||
throw_query,
|
||||
string_query,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -56,41 +51,53 @@ impl LogExtractor {
|
|||
|
||||
let mut log_call_cursor = QueryCursor::new();
|
||||
let mut throw_call_cursor = QueryCursor::new();
|
||||
let mut tree_cursor = tree.walk();
|
||||
let log_calls = self.get_log_calls(&mut log_call_cursor, code, tree.root_node());
|
||||
let throw_calls = self.get_throw_calls(&mut throw_call_cursor, code, tree.root_node());
|
||||
let mut all = log_calls
|
||||
.chain(throw_calls)
|
||||
.map(|call| {
|
||||
let mut string_cursor = QueryCursor::new();
|
||||
let message_parts = string_cursor
|
||||
.matches(&self.string_query, call.arguments, code.as_bytes())
|
||||
.map(|result| {
|
||||
let node = result.captures[0].node;
|
||||
let raw = node.utf8_text(code.as_bytes()).unwrap_or("malformed utf8");
|
||||
.filter_map(|call| {
|
||||
let argument = call.arguments.child(0)?;
|
||||
if argument.grammar_name() != "string"
|
||||
&& argument.grammar_name() != "encapsed_string"
|
||||
{
|
||||
return None;
|
||||
}
|
||||
let mut argument_string_parts = argument.children(&mut tree_cursor);
|
||||
let is_double_quote = argument_string_parts.next()?.grammar_name() == r#"""#;
|
||||
let mut message_builder =
|
||||
MessageBuilder::with_capacity(argument_string_parts.len());
|
||||
|
||||
if raw.contains('\\') {
|
||||
let start_char =
|
||||
code.as_bytes()[node.parent().unwrap().byte_range().start];
|
||||
Cow::Owned(
|
||||
if start_char == b'"' {
|
||||
unescape::<DoubleQuoteString>(raw)
|
||||
} else {
|
||||
unescape::<SingleQuoteString>(raw)
|
||||
}
|
||||
.unwrap(),
|
||||
)
|
||||
} else {
|
||||
Cow::Borrowed(raw)
|
||||
for string_part in argument_string_parts {
|
||||
match string_part.grammar_name() {
|
||||
"string_content" => {
|
||||
let content = string_part.utf8_text(code.as_bytes()).unwrap();
|
||||
message_builder.push_literal(content);
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
"escape_sequence" => {
|
||||
let raw = string_part.utf8_text(code.as_bytes()).unwrap();
|
||||
let content = if is_double_quote {
|
||||
unescape::<DoubleQuoteString>(raw)
|
||||
} else {
|
||||
unescape::<SingleQuoteString>(raw)
|
||||
}
|
||||
.unwrap();
|
||||
message_builder.push_literal(&content);
|
||||
}
|
||||
r#"'"# | r#"""# | r#"{"# | r#"}"# => {}
|
||||
_ => {
|
||||
let placeholder = string_part.utf8_text(code.as_bytes()).unwrap();
|
||||
message_builder.push_placeholder(placeholder);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
LoggingStatement {
|
||||
Some(LoggingStatement {
|
||||
level: call.level,
|
||||
line: call.line + 1,
|
||||
path,
|
||||
message_parts,
|
||||
}
|
||||
message_parts: message_builder.0,
|
||||
})
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
|
|
@ -155,13 +162,33 @@ struct LogCall<'tree> {
|
|||
arguments: Node<'tree>,
|
||||
}
|
||||
|
||||
struct MessageBuilder(Vec<MessagePart>);
|
||||
|
||||
impl MessageBuilder {
|
||||
pub fn with_capacity(cap: usize) -> Self {
|
||||
MessageBuilder(Vec::with_capacity(cap))
|
||||
}
|
||||
|
||||
pub fn push_literal(&mut self, content: &str) {
|
||||
if let Some(MessagePart::Literal(last_part)) = self.0.last_mut() {
|
||||
last_part.push_str(content);
|
||||
} else {
|
||||
self.0.push(MessagePart::Literal(content.into()))
|
||||
}
|
||||
}
|
||||
|
||||
pub fn push_placeholder(&mut self, placeholder: &str) {
|
||||
self.0.push(MessagePart::PlaceHolder(placeholder.into()));
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_extract_logging() {
|
||||
let code = r#"<?php
|
||||
function test() {
|
||||
$this->logger->warning("failed to find trash item for $rootTrashedItemName deleted at $rootTrashedItemDate in folder $groupFolderId", ['app' => 'groupfolders']);
|
||||
$logger->info("foobar");
|
||||
throw new FooException("foo \"bar\" \' {$blarg}");
|
||||
$logger->info('foobar');
|
||||
throw new FooException("foo \"bar\" \' {$this->blarg}");
|
||||
}
|
||||
?>
|
||||
"#;
|
||||
|
|
@ -174,9 +201,12 @@ fn test_extract_logging() {
|
|||
line: 3,
|
||||
level: LogLevel::Warn,
|
||||
message_parts: vec![
|
||||
"failed to find trash item for ".into(),
|
||||
" deleted at ".into(),
|
||||
" in folder ".into()
|
||||
MessagePart::Literal("failed to find trash item for ".into()),
|
||||
MessagePart::PlaceHolder("$rootTrashedItemName".into()),
|
||||
MessagePart::Literal(" deleted at ".into()),
|
||||
MessagePart::PlaceHolder("$rootTrashedItemDate".into()),
|
||||
MessagePart::Literal(" in folder ".into()),
|
||||
MessagePart::PlaceHolder("$groupFolderId".into()),
|
||||
]
|
||||
}
|
||||
);
|
||||
|
|
@ -186,7 +216,7 @@ fn test_extract_logging() {
|
|||
path: "foo.php",
|
||||
line: 4,
|
||||
level: LogLevel::Info,
|
||||
message_parts: vec!["foobar".into()]
|
||||
message_parts: vec![MessagePart::Literal("foobar".into())]
|
||||
}
|
||||
);
|
||||
assert_eq!(
|
||||
|
|
@ -196,11 +226,8 @@ fn test_extract_logging() {
|
|||
line: 5,
|
||||
level: LogLevel::Exception,
|
||||
message_parts: vec![
|
||||
"foo ".into(),
|
||||
"\"".into(),
|
||||
"bar".into(),
|
||||
"\"".into(),
|
||||
" \\' ".into()
|
||||
MessagePart::Literal(r#"foo "bar" \' "#.into()),
|
||||
MessagePart::PlaceHolder("$this->blarg".into())
|
||||
]
|
||||
}
|
||||
);
|
||||
|
|
|
|||
|
|
@ -1,7 +1,6 @@
|
|||
use crate::error::Error;
|
||||
use crate::extractor::LogExtractor;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::borrow::Cow;
|
||||
use std::fs::File;
|
||||
use std::io::{Read, Write};
|
||||
use tracing::error;
|
||||
|
|
@ -21,7 +20,13 @@ pub struct LoggingStatement<'a> {
|
|||
level: LogLevel,
|
||||
path: &'a str,
|
||||
line: usize,
|
||||
message_parts: Vec<Cow<'a, str>>,
|
||||
message_parts: Vec<MessagePart>,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Serialize, Deserialize)]
|
||||
pub enum MessagePart {
|
||||
Literal(String),
|
||||
PlaceHolder(String),
|
||||
}
|
||||
|
||||
pub fn extract_dir<W: Write>(root: &str, mut output: W, bake: bool) -> Result<(), Error> {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue