mirror of
https://codeberg.org/icewind/logsmash.git
synced 2026-06-03 18:14:11 +02:00
generate better regexes and capture placeholder names
This commit is contained in:
parent
19c1c57acc
commit
04e391aea1
15 changed files with 21695 additions and 65312 deletions
2
logging-extractor/Cargo.lock
generated
2
logging-extractor/Cargo.lock
generated
|
|
@ -115,6 +115,8 @@ dependencies = [
|
|||
"databake",
|
||||
"insta",
|
||||
"memchr",
|
||||
"regex",
|
||||
"regex-syntax",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"test-case",
|
||||
|
|
|
|||
|
|
@ -20,6 +20,8 @@ tree-sitter = "0.22.6"
|
|||
tree-sitter-php = "0.22.7"
|
||||
memchr = "2.7.4"
|
||||
databake = { version = "0.1.8", features = ["derive"] }
|
||||
regex-syntax = "0.8.4"
|
||||
regex = "1.10.5"
|
||||
|
||||
[build-dependencies]
|
||||
cc = "1.1.6"
|
||||
|
|
|
|||
|
|
@ -1,5 +1,4 @@
|
|||
use databake::Bake;
|
||||
use std::borrow::Cow;
|
||||
|
||||
#[derive(Debug, Default, PartialEq, Clone, Copy, Bake)]
|
||||
#[databake(path = crate)]
|
||||
|
|
@ -40,34 +39,43 @@ pub struct LoggingStatement<'a> {
|
|||
pub level: LogLevel,
|
||||
pub path: &'a str,
|
||||
pub line: usize,
|
||||
pub message_parts: &'a [&'a str],
|
||||
pub placeholders: &'a [&'a str],
|
||||
pub regex: &'a str,
|
||||
}
|
||||
|
||||
fn build_pattern<'a>(parts: &[crate::MessagePart]) -> String {
|
||||
let mut pattern = String::with_capacity(128);
|
||||
pattern.push('^');
|
||||
for part in parts {
|
||||
match part {
|
||||
crate::MessagePart::Literal(literal) => {
|
||||
pattern.push_str(®ex_syntax::escape(literal))
|
||||
}
|
||||
crate::MessagePart::PlaceHolder(_placeholder) => {
|
||||
pattern.push_str("(.*)");
|
||||
}
|
||||
}
|
||||
}
|
||||
pattern.push('$');
|
||||
pattern
|
||||
}
|
||||
|
||||
pub fn bake_statement(output: &mut String, statement: &crate::LoggingStatement) {
|
||||
let message_parts: Vec<_> = statement.message_parts.iter().map(Cow::as_ref).collect();
|
||||
let placeholders: Vec<_> = statement
|
||||
.message_parts
|
||||
.iter()
|
||||
.filter_map(|part| match part {
|
||||
crate::MessagePart::PlaceHolder(placeholder) => Some(placeholder.as_str()),
|
||||
_ => None,
|
||||
})
|
||||
.collect();
|
||||
let pattern = build_pattern(&statement.message_parts);
|
||||
let statement = LoggingStatement {
|
||||
level: statement.level.into(),
|
||||
path: statement.path,
|
||||
line: statement.line,
|
||||
message_parts: &message_parts,
|
||||
placeholders: &placeholders,
|
||||
regex: &pattern,
|
||||
};
|
||||
output.push_str(&statement.bake(&Default::default()).to_string());
|
||||
}
|
||||
|
||||
#[cfg(feature = "bake")]
|
||||
mod bake_test {
|
||||
#[test]
|
||||
fn test_bake() {
|
||||
use databake::test_bake;
|
||||
test_bake!(
|
||||
crate::LoggingStatement,
|
||||
const: crate::LoggingStatement {
|
||||
level: crate::LogLevel::Debug,
|
||||
path: "foo",
|
||||
line: 12usize,
|
||||
message_parts: &["part1", "part2"]
|
||||
},
|
||||
cloud_log_analyser,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,13 +1,11 @@
|
|||
use crate::string::{unescape, DoubleQuoteString, SingleQuoteString};
|
||||
use crate::{LogLevel, LoggingStatement};
|
||||
use std::borrow::Cow;
|
||||
use crate::{LogLevel, LoggingStatement, MessagePart};
|
||||
use tree_sitter::{Language, Node, Parser, Query, QueryCursor};
|
||||
|
||||
pub struct LogExtractor {
|
||||
language: Language,
|
||||
method_query: Query,
|
||||
throw_query: Query,
|
||||
string_query: Query,
|
||||
}
|
||||
|
||||
impl LogExtractor {
|
||||
|
|
@ -30,13 +28,10 @@ impl LogExtractor {
|
|||
)"#,
|
||||
)
|
||||
.expect("invalid query");
|
||||
let string_query = Query::new(&language, r#"[(string_content)(escape_sequence)]@string"#)
|
||||
.expect("invalid query");
|
||||
LogExtractor {
|
||||
language,
|
||||
method_query,
|
||||
throw_query,
|
||||
string_query,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -56,41 +51,53 @@ impl LogExtractor {
|
|||
|
||||
let mut log_call_cursor = QueryCursor::new();
|
||||
let mut throw_call_cursor = QueryCursor::new();
|
||||
let mut tree_cursor = tree.walk();
|
||||
let log_calls = self.get_log_calls(&mut log_call_cursor, code, tree.root_node());
|
||||
let throw_calls = self.get_throw_calls(&mut throw_call_cursor, code, tree.root_node());
|
||||
let mut all = log_calls
|
||||
.chain(throw_calls)
|
||||
.map(|call| {
|
||||
let mut string_cursor = QueryCursor::new();
|
||||
let message_parts = string_cursor
|
||||
.matches(&self.string_query, call.arguments, code.as_bytes())
|
||||
.map(|result| {
|
||||
let node = result.captures[0].node;
|
||||
let raw = node.utf8_text(code.as_bytes()).unwrap_or("malformed utf8");
|
||||
.filter_map(|call| {
|
||||
let argument = call.arguments.child(0)?;
|
||||
if argument.grammar_name() != "string"
|
||||
&& argument.grammar_name() != "encapsed_string"
|
||||
{
|
||||
return None;
|
||||
}
|
||||
let mut argument_string_parts = argument.children(&mut tree_cursor);
|
||||
let is_double_quote = argument_string_parts.next()?.grammar_name() == r#"""#;
|
||||
let mut message_builder =
|
||||
MessageBuilder::with_capacity(argument_string_parts.len());
|
||||
|
||||
if raw.contains('\\') {
|
||||
let start_char =
|
||||
code.as_bytes()[node.parent().unwrap().byte_range().start];
|
||||
Cow::Owned(
|
||||
if start_char == b'"' {
|
||||
unescape::<DoubleQuoteString>(raw)
|
||||
} else {
|
||||
unescape::<SingleQuoteString>(raw)
|
||||
}
|
||||
.unwrap(),
|
||||
)
|
||||
} else {
|
||||
Cow::Borrowed(raw)
|
||||
for string_part in argument_string_parts {
|
||||
match string_part.grammar_name() {
|
||||
"string_content" => {
|
||||
let content = string_part.utf8_text(code.as_bytes()).unwrap();
|
||||
message_builder.push_literal(content);
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
"escape_sequence" => {
|
||||
let raw = string_part.utf8_text(code.as_bytes()).unwrap();
|
||||
let content = if is_double_quote {
|
||||
unescape::<DoubleQuoteString>(raw)
|
||||
} else {
|
||||
unescape::<SingleQuoteString>(raw)
|
||||
}
|
||||
.unwrap();
|
||||
message_builder.push_literal(&content);
|
||||
}
|
||||
r#"'"# | r#"""# | r#"{"# | r#"}"# => {}
|
||||
_ => {
|
||||
let placeholder = string_part.utf8_text(code.as_bytes()).unwrap();
|
||||
message_builder.push_placeholder(placeholder);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
LoggingStatement {
|
||||
Some(LoggingStatement {
|
||||
level: call.level,
|
||||
line: call.line + 1,
|
||||
path,
|
||||
message_parts,
|
||||
}
|
||||
message_parts: message_builder.0,
|
||||
})
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
|
|
@ -155,13 +162,33 @@ struct LogCall<'tree> {
|
|||
arguments: Node<'tree>,
|
||||
}
|
||||
|
||||
struct MessageBuilder(Vec<MessagePart>);
|
||||
|
||||
impl MessageBuilder {
|
||||
pub fn with_capacity(cap: usize) -> Self {
|
||||
MessageBuilder(Vec::with_capacity(cap))
|
||||
}
|
||||
|
||||
pub fn push_literal(&mut self, content: &str) {
|
||||
if let Some(MessagePart::Literal(last_part)) = self.0.last_mut() {
|
||||
last_part.push_str(content);
|
||||
} else {
|
||||
self.0.push(MessagePart::Literal(content.into()))
|
||||
}
|
||||
}
|
||||
|
||||
pub fn push_placeholder(&mut self, placeholder: &str) {
|
||||
self.0.push(MessagePart::PlaceHolder(placeholder.into()));
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_extract_logging() {
|
||||
let code = r#"<?php
|
||||
function test() {
|
||||
$this->logger->warning("failed to find trash item for $rootTrashedItemName deleted at $rootTrashedItemDate in folder $groupFolderId", ['app' => 'groupfolders']);
|
||||
$logger->info("foobar");
|
||||
throw new FooException("foo \"bar\" \' {$blarg}");
|
||||
$logger->info('foobar');
|
||||
throw new FooException("foo \"bar\" \' {$this->blarg}");
|
||||
}
|
||||
?>
|
||||
"#;
|
||||
|
|
@ -174,9 +201,12 @@ fn test_extract_logging() {
|
|||
line: 3,
|
||||
level: LogLevel::Warn,
|
||||
message_parts: vec![
|
||||
"failed to find trash item for ".into(),
|
||||
" deleted at ".into(),
|
||||
" in folder ".into()
|
||||
MessagePart::Literal("failed to find trash item for ".into()),
|
||||
MessagePart::PlaceHolder("$rootTrashedItemName".into()),
|
||||
MessagePart::Literal(" deleted at ".into()),
|
||||
MessagePart::PlaceHolder("$rootTrashedItemDate".into()),
|
||||
MessagePart::Literal(" in folder ".into()),
|
||||
MessagePart::PlaceHolder("$groupFolderId".into()),
|
||||
]
|
||||
}
|
||||
);
|
||||
|
|
@ -186,7 +216,7 @@ fn test_extract_logging() {
|
|||
path: "foo.php",
|
||||
line: 4,
|
||||
level: LogLevel::Info,
|
||||
message_parts: vec!["foobar".into()]
|
||||
message_parts: vec![MessagePart::Literal("foobar".into())]
|
||||
}
|
||||
);
|
||||
assert_eq!(
|
||||
|
|
@ -196,11 +226,8 @@ fn test_extract_logging() {
|
|||
line: 5,
|
||||
level: LogLevel::Exception,
|
||||
message_parts: vec![
|
||||
"foo ".into(),
|
||||
"\"".into(),
|
||||
"bar".into(),
|
||||
"\"".into(),
|
||||
" \\' ".into()
|
||||
MessagePart::Literal(r#"foo "bar" \' "#.into()),
|
||||
MessagePart::PlaceHolder("$this->blarg".into())
|
||||
]
|
||||
}
|
||||
);
|
||||
|
|
|
|||
|
|
@ -1,7 +1,6 @@
|
|||
use crate::error::Error;
|
||||
use crate::extractor::LogExtractor;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::borrow::Cow;
|
||||
use std::fs::File;
|
||||
use std::io::{Read, Write};
|
||||
use tracing::error;
|
||||
|
|
@ -21,7 +20,13 @@ pub struct LoggingStatement<'a> {
|
|||
level: LogLevel,
|
||||
path: &'a str,
|
||||
line: usize,
|
||||
message_parts: Vec<Cow<'a, str>>,
|
||||
message_parts: Vec<MessagePart>,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Serialize, Deserialize)]
|
||||
pub enum MessagePart {
|
||||
Literal(String),
|
||||
PlaceHolder(String),
|
||||
}
|
||||
|
||||
pub fn extract_dir<W: Write>(root: &str, mut output: W, bake: bool) -> Result<(), Error> {
|
||||
|
|
|
|||
|
|
@ -8,16 +8,9 @@ expression: output
|
|||
"path": "/DefaultShareProvider.php",
|
||||
"line": 129,
|
||||
"message_parts": [
|
||||
"invalid share type!"
|
||||
]
|
||||
},
|
||||
{
|
||||
"level": "exception",
|
||||
"path": "/DefaultShareProvider.php",
|
||||
"line": 305,
|
||||
"message_parts": [
|
||||
"Group \"",
|
||||
"\" does not exist"
|
||||
{
|
||||
"Literal": "invalid share type!"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
|
|
@ -25,7 +18,9 @@ expression: output
|
|||
"path": "/DefaultShareProvider.php",
|
||||
"line": 309,
|
||||
"message_parts": [
|
||||
"Recipient not in receiving group"
|
||||
{
|
||||
"Literal": "Recipient not in receiving group"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
|
|
@ -33,7 +28,9 @@ expression: output
|
|||
"path": "/DefaultShareProvider.php",
|
||||
"line": 339,
|
||||
"message_parts": [
|
||||
"Recipient does not match"
|
||||
{
|
||||
"Literal": "Recipient does not match"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
|
|
@ -41,16 +38,9 @@ expression: output
|
|||
"path": "/DefaultShareProvider.php",
|
||||
"line": 344,
|
||||
"message_parts": [
|
||||
"Invalid shareType"
|
||||
]
|
||||
},
|
||||
{
|
||||
"level": "exception",
|
||||
"path": "/DefaultShareProvider.php",
|
||||
"line": 431,
|
||||
"message_parts": [
|
||||
"Group \"",
|
||||
"\" does not exist"
|
||||
{
|
||||
"Literal": "Invalid shareType"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
|
|
@ -58,7 +48,9 @@ expression: output
|
|||
"path": "/DefaultShareProvider.php",
|
||||
"line": 476,
|
||||
"message_parts": [
|
||||
"Recipient does not match"
|
||||
{
|
||||
"Literal": "Recipient does not match"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
|
|
@ -66,7 +58,9 @@ expression: output
|
|||
"path": "/DefaultShareProvider.php",
|
||||
"line": 482,
|
||||
"message_parts": [
|
||||
"Invalid shareType"
|
||||
{
|
||||
"Literal": "Invalid shareType"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
|
|
@ -74,7 +68,9 @@ expression: output
|
|||
"path": "/DefaultShareProvider.php",
|
||||
"line": 609,
|
||||
"message_parts": [
|
||||
"non-shallow getSharesInFolder is no longer supported"
|
||||
{
|
||||
"Literal": "non-shallow getSharesInFolder is no longer supported"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
|
|
@ -82,41 +78,9 @@ expression: output
|
|||
"path": "/DefaultShareProvider.php",
|
||||
"line": 963,
|
||||
"message_parts": [
|
||||
"Invalid backend"
|
||||
]
|
||||
},
|
||||
{
|
||||
"level": "error",
|
||||
"path": "/DefaultShareProvider.php",
|
||||
"line": 1171,
|
||||
"message_parts": []
|
||||
},
|
||||
{
|
||||
"level": "debug",
|
||||
"path": "/DefaultShareProvider.php",
|
||||
"line": 1392,
|
||||
"message_parts": [
|
||||
"Share notification not sent to ",
|
||||
" because user could not be found."
|
||||
]
|
||||
},
|
||||
{
|
||||
"level": "debug",
|
||||
"path": "/DefaultShareProvider.php",
|
||||
"line": 1401,
|
||||
"message_parts": [
|
||||
"Share notification not sent to ",
|
||||
" because email address is not set."
|
||||
]
|
||||
},
|
||||
{
|
||||
"level": "debug",
|
||||
"path": "/DefaultShareProvider.php",
|
||||
"line": 1416,
|
||||
"message_parts": [
|
||||
"Sent share notification to ",
|
||||
" for share with ID ",
|
||||
"."
|
||||
{
|
||||
"Literal": "Invalid backend"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
|
|
@ -124,16 +88,9 @@ expression: output
|
|||
"path": "/DefaultShareProvider.php",
|
||||
"line": 1420,
|
||||
"message_parts": [
|
||||
"Share notification mail could not be sent."
|
||||
]
|
||||
},
|
||||
{
|
||||
"level": "error",
|
||||
"path": "/DefaultShareProvider.php",
|
||||
"line": 1507,
|
||||
"message_parts": [
|
||||
"Share notification mail could not be sent to: ",
|
||||
", "
|
||||
{
|
||||
"Literal": "Share notification mail could not be sent."
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue