mirror of
https://codeberg.org/icewind/logsmash.git
synced 2026-06-03 18:14:11 +02:00
generate better regexes and capture placeholder names
This commit is contained in:
parent
19c1c57acc
commit
04e391aea1
15 changed files with 21695 additions and 65312 deletions
7
README.md
Normal file
7
README.md
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
# cloud-log-analyser
|
||||
|
||||
### Updating baked data
|
||||
|
||||
rm -r data/src/data
|
||||
nix build .#extracted-logs-rust
|
||||
cp -rL result data/src/data && chmod -R +w data/src/data
|
||||
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
|
|
@ -26,10 +26,11 @@ impl From<i64> for LogLevel {
|
|||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
#[derive(Debug, PartialEq, Clone)]
|
||||
pub struct LoggingStatement {
|
||||
pub level: LogLevel,
|
||||
pub path: &'static str,
|
||||
pub line: usize,
|
||||
pub message_parts: &'static [&'static str],
|
||||
pub placeholders: &'static [&'static str],
|
||||
pub regex: &'static str,
|
||||
}
|
||||
|
|
|
|||
2
logging-extractor/Cargo.lock
generated
2
logging-extractor/Cargo.lock
generated
|
|
@ -115,6 +115,8 @@ dependencies = [
|
|||
"databake",
|
||||
"insta",
|
||||
"memchr",
|
||||
"regex",
|
||||
"regex-syntax",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"test-case",
|
||||
|
|
|
|||
|
|
@ -20,6 +20,8 @@ tree-sitter = "0.22.6"
|
|||
tree-sitter-php = "0.22.7"
|
||||
memchr = "2.7.4"
|
||||
databake = { version = "0.1.8", features = ["derive"] }
|
||||
regex-syntax = "0.8.4"
|
||||
regex = "1.10.5"
|
||||
|
||||
[build-dependencies]
|
||||
cc = "1.1.6"
|
||||
|
|
|
|||
|
|
@ -1,5 +1,4 @@
|
|||
use databake::Bake;
|
||||
use std::borrow::Cow;
|
||||
|
||||
#[derive(Debug, Default, PartialEq, Clone, Copy, Bake)]
|
||||
#[databake(path = crate)]
|
||||
|
|
@ -40,34 +39,43 @@ pub struct LoggingStatement<'a> {
|
|||
pub level: LogLevel,
|
||||
pub path: &'a str,
|
||||
pub line: usize,
|
||||
pub message_parts: &'a [&'a str],
|
||||
pub placeholders: &'a [&'a str],
|
||||
pub regex: &'a str,
|
||||
}
|
||||
|
||||
fn build_pattern<'a>(parts: &[crate::MessagePart]) -> String {
|
||||
let mut pattern = String::with_capacity(128);
|
||||
pattern.push('^');
|
||||
for part in parts {
|
||||
match part {
|
||||
crate::MessagePart::Literal(literal) => {
|
||||
pattern.push_str(®ex_syntax::escape(literal))
|
||||
}
|
||||
crate::MessagePart::PlaceHolder(_placeholder) => {
|
||||
pattern.push_str("(.*)");
|
||||
}
|
||||
}
|
||||
}
|
||||
pattern.push('$');
|
||||
pattern
|
||||
}
|
||||
|
||||
pub fn bake_statement(output: &mut String, statement: &crate::LoggingStatement) {
|
||||
let message_parts: Vec<_> = statement.message_parts.iter().map(Cow::as_ref).collect();
|
||||
let placeholders: Vec<_> = statement
|
||||
.message_parts
|
||||
.iter()
|
||||
.filter_map(|part| match part {
|
||||
crate::MessagePart::PlaceHolder(placeholder) => Some(placeholder.as_str()),
|
||||
_ => None,
|
||||
})
|
||||
.collect();
|
||||
let pattern = build_pattern(&statement.message_parts);
|
||||
let statement = LoggingStatement {
|
||||
level: statement.level.into(),
|
||||
path: statement.path,
|
||||
line: statement.line,
|
||||
message_parts: &message_parts,
|
||||
placeholders: &placeholders,
|
||||
regex: &pattern,
|
||||
};
|
||||
output.push_str(&statement.bake(&Default::default()).to_string());
|
||||
}
|
||||
|
||||
#[cfg(feature = "bake")]
|
||||
mod bake_test {
|
||||
#[test]
|
||||
fn test_bake() {
|
||||
use databake::test_bake;
|
||||
test_bake!(
|
||||
crate::LoggingStatement,
|
||||
const: crate::LoggingStatement {
|
||||
level: crate::LogLevel::Debug,
|
||||
path: "foo",
|
||||
line: 12usize,
|
||||
message_parts: &["part1", "part2"]
|
||||
},
|
||||
cloud_log_analyser,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,13 +1,11 @@
|
|||
use crate::string::{unescape, DoubleQuoteString, SingleQuoteString};
|
||||
use crate::{LogLevel, LoggingStatement};
|
||||
use std::borrow::Cow;
|
||||
use crate::{LogLevel, LoggingStatement, MessagePart};
|
||||
use tree_sitter::{Language, Node, Parser, Query, QueryCursor};
|
||||
|
||||
pub struct LogExtractor {
|
||||
language: Language,
|
||||
method_query: Query,
|
||||
throw_query: Query,
|
||||
string_query: Query,
|
||||
}
|
||||
|
||||
impl LogExtractor {
|
||||
|
|
@ -30,13 +28,10 @@ impl LogExtractor {
|
|||
)"#,
|
||||
)
|
||||
.expect("invalid query");
|
||||
let string_query = Query::new(&language, r#"[(string_content)(escape_sequence)]@string"#)
|
||||
.expect("invalid query");
|
||||
LogExtractor {
|
||||
language,
|
||||
method_query,
|
||||
throw_query,
|
||||
string_query,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -56,41 +51,53 @@ impl LogExtractor {
|
|||
|
||||
let mut log_call_cursor = QueryCursor::new();
|
||||
let mut throw_call_cursor = QueryCursor::new();
|
||||
let mut tree_cursor = tree.walk();
|
||||
let log_calls = self.get_log_calls(&mut log_call_cursor, code, tree.root_node());
|
||||
let throw_calls = self.get_throw_calls(&mut throw_call_cursor, code, tree.root_node());
|
||||
let mut all = log_calls
|
||||
.chain(throw_calls)
|
||||
.map(|call| {
|
||||
let mut string_cursor = QueryCursor::new();
|
||||
let message_parts = string_cursor
|
||||
.matches(&self.string_query, call.arguments, code.as_bytes())
|
||||
.map(|result| {
|
||||
let node = result.captures[0].node;
|
||||
let raw = node.utf8_text(code.as_bytes()).unwrap_or("malformed utf8");
|
||||
.filter_map(|call| {
|
||||
let argument = call.arguments.child(0)?;
|
||||
if argument.grammar_name() != "string"
|
||||
&& argument.grammar_name() != "encapsed_string"
|
||||
{
|
||||
return None;
|
||||
}
|
||||
let mut argument_string_parts = argument.children(&mut tree_cursor);
|
||||
let is_double_quote = argument_string_parts.next()?.grammar_name() == r#"""#;
|
||||
let mut message_builder =
|
||||
MessageBuilder::with_capacity(argument_string_parts.len());
|
||||
|
||||
if raw.contains('\\') {
|
||||
let start_char =
|
||||
code.as_bytes()[node.parent().unwrap().byte_range().start];
|
||||
Cow::Owned(
|
||||
if start_char == b'"' {
|
||||
unescape::<DoubleQuoteString>(raw)
|
||||
} else {
|
||||
unescape::<SingleQuoteString>(raw)
|
||||
}
|
||||
.unwrap(),
|
||||
)
|
||||
} else {
|
||||
Cow::Borrowed(raw)
|
||||
for string_part in argument_string_parts {
|
||||
match string_part.grammar_name() {
|
||||
"string_content" => {
|
||||
let content = string_part.utf8_text(code.as_bytes()).unwrap();
|
||||
message_builder.push_literal(content);
|
||||
}
|
||||
})
|
||||
.collect();
|
||||
"escape_sequence" => {
|
||||
let raw = string_part.utf8_text(code.as_bytes()).unwrap();
|
||||
let content = if is_double_quote {
|
||||
unescape::<DoubleQuoteString>(raw)
|
||||
} else {
|
||||
unescape::<SingleQuoteString>(raw)
|
||||
}
|
||||
.unwrap();
|
||||
message_builder.push_literal(&content);
|
||||
}
|
||||
r#"'"# | r#"""# | r#"{"# | r#"}"# => {}
|
||||
_ => {
|
||||
let placeholder = string_part.utf8_text(code.as_bytes()).unwrap();
|
||||
message_builder.push_placeholder(placeholder);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
LoggingStatement {
|
||||
Some(LoggingStatement {
|
||||
level: call.level,
|
||||
line: call.line + 1,
|
||||
path,
|
||||
message_parts,
|
||||
}
|
||||
message_parts: message_builder.0,
|
||||
})
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
|
|
@ -155,13 +162,33 @@ struct LogCall<'tree> {
|
|||
arguments: Node<'tree>,
|
||||
}
|
||||
|
||||
struct MessageBuilder(Vec<MessagePart>);
|
||||
|
||||
impl MessageBuilder {
|
||||
pub fn with_capacity(cap: usize) -> Self {
|
||||
MessageBuilder(Vec::with_capacity(cap))
|
||||
}
|
||||
|
||||
pub fn push_literal(&mut self, content: &str) {
|
||||
if let Some(MessagePart::Literal(last_part)) = self.0.last_mut() {
|
||||
last_part.push_str(content);
|
||||
} else {
|
||||
self.0.push(MessagePart::Literal(content.into()))
|
||||
}
|
||||
}
|
||||
|
||||
pub fn push_placeholder(&mut self, placeholder: &str) {
|
||||
self.0.push(MessagePart::PlaceHolder(placeholder.into()));
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_extract_logging() {
|
||||
let code = r#"<?php
|
||||
function test() {
|
||||
$this->logger->warning("failed to find trash item for $rootTrashedItemName deleted at $rootTrashedItemDate in folder $groupFolderId", ['app' => 'groupfolders']);
|
||||
$logger->info("foobar");
|
||||
throw new FooException("foo \"bar\" \' {$blarg}");
|
||||
$logger->info('foobar');
|
||||
throw new FooException("foo \"bar\" \' {$this->blarg}");
|
||||
}
|
||||
?>
|
||||
"#;
|
||||
|
|
@ -174,9 +201,12 @@ fn test_extract_logging() {
|
|||
line: 3,
|
||||
level: LogLevel::Warn,
|
||||
message_parts: vec![
|
||||
"failed to find trash item for ".into(),
|
||||
" deleted at ".into(),
|
||||
" in folder ".into()
|
||||
MessagePart::Literal("failed to find trash item for ".into()),
|
||||
MessagePart::PlaceHolder("$rootTrashedItemName".into()),
|
||||
MessagePart::Literal(" deleted at ".into()),
|
||||
MessagePart::PlaceHolder("$rootTrashedItemDate".into()),
|
||||
MessagePart::Literal(" in folder ".into()),
|
||||
MessagePart::PlaceHolder("$groupFolderId".into()),
|
||||
]
|
||||
}
|
||||
);
|
||||
|
|
@ -186,7 +216,7 @@ fn test_extract_logging() {
|
|||
path: "foo.php",
|
||||
line: 4,
|
||||
level: LogLevel::Info,
|
||||
message_parts: vec!["foobar".into()]
|
||||
message_parts: vec![MessagePart::Literal("foobar".into())]
|
||||
}
|
||||
);
|
||||
assert_eq!(
|
||||
|
|
@ -196,11 +226,8 @@ fn test_extract_logging() {
|
|||
line: 5,
|
||||
level: LogLevel::Exception,
|
||||
message_parts: vec![
|
||||
"foo ".into(),
|
||||
"\"".into(),
|
||||
"bar".into(),
|
||||
"\"".into(),
|
||||
" \\' ".into()
|
||||
MessagePart::Literal(r#"foo "bar" \' "#.into()),
|
||||
MessagePart::PlaceHolder("$this->blarg".into())
|
||||
]
|
||||
}
|
||||
);
|
||||
|
|
|
|||
|
|
@ -1,7 +1,6 @@
|
|||
use crate::error::Error;
|
||||
use crate::extractor::LogExtractor;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::borrow::Cow;
|
||||
use std::fs::File;
|
||||
use std::io::{Read, Write};
|
||||
use tracing::error;
|
||||
|
|
@ -21,7 +20,13 @@ pub struct LoggingStatement<'a> {
|
|||
level: LogLevel,
|
||||
path: &'a str,
|
||||
line: usize,
|
||||
message_parts: Vec<Cow<'a, str>>,
|
||||
message_parts: Vec<MessagePart>,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Serialize, Deserialize)]
|
||||
pub enum MessagePart {
|
||||
Literal(String),
|
||||
PlaceHolder(String),
|
||||
}
|
||||
|
||||
pub fn extract_dir<W: Write>(root: &str, mut output: W, bake: bool) -> Result<(), Error> {
|
||||
|
|
|
|||
|
|
@ -8,16 +8,9 @@ expression: output
|
|||
"path": "/DefaultShareProvider.php",
|
||||
"line": 129,
|
||||
"message_parts": [
|
||||
"invalid share type!"
|
||||
]
|
||||
},
|
||||
{
|
||||
"level": "exception",
|
||||
"path": "/DefaultShareProvider.php",
|
||||
"line": 305,
|
||||
"message_parts": [
|
||||
"Group \"",
|
||||
"\" does not exist"
|
||||
{
|
||||
"Literal": "invalid share type!"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
|
|
@ -25,7 +18,9 @@ expression: output
|
|||
"path": "/DefaultShareProvider.php",
|
||||
"line": 309,
|
||||
"message_parts": [
|
||||
"Recipient not in receiving group"
|
||||
{
|
||||
"Literal": "Recipient not in receiving group"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
|
|
@ -33,7 +28,9 @@ expression: output
|
|||
"path": "/DefaultShareProvider.php",
|
||||
"line": 339,
|
||||
"message_parts": [
|
||||
"Recipient does not match"
|
||||
{
|
||||
"Literal": "Recipient does not match"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
|
|
@ -41,16 +38,9 @@ expression: output
|
|||
"path": "/DefaultShareProvider.php",
|
||||
"line": 344,
|
||||
"message_parts": [
|
||||
"Invalid shareType"
|
||||
]
|
||||
},
|
||||
{
|
||||
"level": "exception",
|
||||
"path": "/DefaultShareProvider.php",
|
||||
"line": 431,
|
||||
"message_parts": [
|
||||
"Group \"",
|
||||
"\" does not exist"
|
||||
{
|
||||
"Literal": "Invalid shareType"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
|
|
@ -58,7 +48,9 @@ expression: output
|
|||
"path": "/DefaultShareProvider.php",
|
||||
"line": 476,
|
||||
"message_parts": [
|
||||
"Recipient does not match"
|
||||
{
|
||||
"Literal": "Recipient does not match"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
|
|
@ -66,7 +58,9 @@ expression: output
|
|||
"path": "/DefaultShareProvider.php",
|
||||
"line": 482,
|
||||
"message_parts": [
|
||||
"Invalid shareType"
|
||||
{
|
||||
"Literal": "Invalid shareType"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
|
|
@ -74,7 +68,9 @@ expression: output
|
|||
"path": "/DefaultShareProvider.php",
|
||||
"line": 609,
|
||||
"message_parts": [
|
||||
"non-shallow getSharesInFolder is no longer supported"
|
||||
{
|
||||
"Literal": "non-shallow getSharesInFolder is no longer supported"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
|
|
@ -82,41 +78,9 @@ expression: output
|
|||
"path": "/DefaultShareProvider.php",
|
||||
"line": 963,
|
||||
"message_parts": [
|
||||
"Invalid backend"
|
||||
]
|
||||
},
|
||||
{
|
||||
"level": "error",
|
||||
"path": "/DefaultShareProvider.php",
|
||||
"line": 1171,
|
||||
"message_parts": []
|
||||
},
|
||||
{
|
||||
"level": "debug",
|
||||
"path": "/DefaultShareProvider.php",
|
||||
"line": 1392,
|
||||
"message_parts": [
|
||||
"Share notification not sent to ",
|
||||
" because user could not be found."
|
||||
]
|
||||
},
|
||||
{
|
||||
"level": "debug",
|
||||
"path": "/DefaultShareProvider.php",
|
||||
"line": 1401,
|
||||
"message_parts": [
|
||||
"Share notification not sent to ",
|
||||
" because email address is not set."
|
||||
]
|
||||
},
|
||||
{
|
||||
"level": "debug",
|
||||
"path": "/DefaultShareProvider.php",
|
||||
"line": 1416,
|
||||
"message_parts": [
|
||||
"Sent share notification to ",
|
||||
" for share with ID ",
|
||||
"."
|
||||
{
|
||||
"Literal": "Invalid backend"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
|
|
@ -124,16 +88,9 @@ expression: output
|
|||
"path": "/DefaultShareProvider.php",
|
||||
"line": 1420,
|
||||
"message_parts": [
|
||||
"Share notification mail could not be sent."
|
||||
]
|
||||
},
|
||||
{
|
||||
"level": "error",
|
||||
"path": "/DefaultShareProvider.php",
|
||||
"line": 1507,
|
||||
"message_parts": [
|
||||
"Share notification mail could not be sent to: ",
|
||||
", "
|
||||
{
|
||||
"Literal": "Share notification mail could not be sent."
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
use cloud_log_analyser_data::{LogLevel, LoggingStatement};
|
||||
use regex::{escape, Regex, RegexBuilder};
|
||||
use regex::Regex;
|
||||
|
||||
pub struct LogMatch {
|
||||
level: LogLevel,
|
||||
|
|
@ -11,8 +11,8 @@ impl LogMatch {
|
|||
pub fn new(statement: &LoggingStatement) -> LogMatch {
|
||||
LogMatch {
|
||||
level: statement.level,
|
||||
pattern: build_pattern(statement.message_parts),
|
||||
pattern_length: statement.message_parts.iter().copied().map(str::len).sum(),
|
||||
pattern: Regex::new(statement.regex).unwrap(),
|
||||
pattern_length: statement.regex.len(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -48,25 +48,6 @@ impl Matcher {
|
|||
}
|
||||
}
|
||||
|
||||
fn build_pattern<'a>(parts: &[&str]) -> Regex {
|
||||
let mut pattern = String::with_capacity(128);
|
||||
for part in parts {
|
||||
pattern.push_str(&escape(part));
|
||||
pattern.push_str("(.*)");
|
||||
}
|
||||
RegexBuilder::new(&pattern)
|
||||
.build()
|
||||
.expect("Failed to build regex")
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_build_pattern() {
|
||||
let regex = build_pattern(["foobar", "asd"]);
|
||||
assert!(regex.is_match("foobar with asd and more"));
|
||||
assert!(regex.is_match("foobarasd"));
|
||||
assert!(!regex.is_match("fooasd"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_matcher() {
|
||||
let statements = &[
|
||||
|
|
@ -74,32 +55,29 @@ fn test_matcher() {
|
|||
line: 68,
|
||||
level: LogLevel::Exception,
|
||||
path: "foo",
|
||||
message_parts: vec!["Not allowed to rename a shared album".into()],
|
||||
placeholders: &[],
|
||||
regex: "^Not allowed to rename a shared album$",
|
||||
},
|
||||
LoggingStatement {
|
||||
line: 69,
|
||||
level: LogLevel::Error,
|
||||
path: "bar",
|
||||
message_parts: vec![
|
||||
"You are not allowed to edit link shares that you don".into(),
|
||||
"'".into(),
|
||||
"t own".into(),
|
||||
],
|
||||
placeholders: &[],
|
||||
regex: "^You are not allowed to edit link shares that you don't own$",
|
||||
},
|
||||
LoggingStatement {
|
||||
line: 69,
|
||||
level: LogLevel::Error,
|
||||
path: "asd",
|
||||
message_parts: vec![
|
||||
"Unsupported query value for mimetype: ".into(),
|
||||
", only values in the format \"mime/type\" or \"mime/%\" are supported".into(),
|
||||
],
|
||||
placeholders: &["$mimeType"],
|
||||
regex: r#"^Unsupported query value for mimetype: (.*), only values in the format "mime/type" or "mime/%" are supported$"#,
|
||||
},
|
||||
LoggingStatement {
|
||||
line: 68,
|
||||
level: LogLevel::Exception,
|
||||
path: "short",
|
||||
message_parts: vec!["Not allowed to rename".into()],
|
||||
placeholders: &["$path"],
|
||||
regex: "^Not allowed to rename (.*)$",
|
||||
},
|
||||
];
|
||||
let matcher = Matcher::new(statements);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue