improve matching

This commit is contained in:
Robin Appelman 2024-07-22 18:57:18 +02:00
commit 9413b216ba
24 changed files with 73837 additions and 21689 deletions

535
Cargo.lock generated
View file

@ -2,6 +2,23 @@
# It is not intended for manual editing.
version = 3
[[package]]
name = "adler"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe"
[[package]]
name = "aes"
version = "0.8.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b169f7a6d4742236a0a00c541b845991d0ac43e546831af1249753ab4c3aa3a0"
dependencies = [
"cfg-if",
"cipher",
"cpufeatures",
]
[[package]]
name = "aho-corasick"
version = "1.1.3"
@ -11,12 +28,89 @@ dependencies = [
"memchr",
]
[[package]]
name = "arbitrary"
version = "1.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7d5a26814d8dcb93b0e5a0ff3c6d80a8843bafb21b39e8e18a6f05471870e110"
dependencies = [
"derive_arbitrary",
]
[[package]]
name = "bitflags"
version = "1.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
[[package]]
name = "block-buffer"
version = "0.10.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71"
dependencies = [
"generic-array",
]
[[package]]
name = "bumpalo"
version = "3.16.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c"
[[package]]
name = "byteorder"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
[[package]]
name = "bzip2"
version = "0.4.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bdb116a6ef3f6c3698828873ad02c3014b3c85cadb88496095628e3ef1e347f8"
dependencies = [
"bzip2-sys",
"libc",
]
[[package]]
name = "bzip2-sys"
version = "0.1.11+1.0.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "736a955f3fa7875102d57c82b8cac37ec45224a07fd32d58f9f7a186b6cd4cdc"
dependencies = [
"cc",
"libc",
"pkg-config",
]
[[package]]
name = "cc"
version = "1.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2aba8f4e9906c7ce3c73463f62a7f0c65183ada1a2d47e397cc8810827f9694f"
dependencies = [
"jobserver",
"libc",
]
[[package]]
name = "cfg-if"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "cipher"
version = "0.4.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "773f3b9af64447d2ce9850330c473515014aa235e6a783b02db81ff39e4a3dad"
dependencies = [
"crypto-common",
"inout",
]
[[package]]
name = "clap"
version = "4.1.3"
@ -60,17 +154,172 @@ version = "0.1.0"
dependencies = [
"clap",
"cloud-log-analyser-data",
"itertools",
"log",
"main_error",
"regex",
"serde",
"serde_json",
"thiserror",
"zip",
]
[[package]]
name = "cloud-log-analyser-data"
version = "0.1.0"
[[package]]
name = "constant_time_eq"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f7144d30dcf0fafbce74250a3963025d8d52177934239851c917d29f1df280c2"
[[package]]
name = "cpufeatures"
version = "0.2.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "53fe5e26ff1b7aef8bca9c6080520cfb8d9333c7568e1829cef191a9723e5504"
dependencies = [
"libc",
]
[[package]]
name = "crc"
version = "3.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "69e6e4d7b33a94f0991c26729976b10ebde1d34c3ee82408fb536164fa10d636"
dependencies = [
"crc-catalog",
]
[[package]]
name = "crc-catalog"
version = "2.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "19d374276b40fb8bbdee95aef7c7fa6b5316ec764510eb64b8dd0e2ed0d7e7f5"
[[package]]
name = "crc32fast"
version = "1.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a97769d94ddab943e4510d138150169a2758b5ef3eb191a9ee688de3e23ef7b3"
dependencies = [
"cfg-if",
]
[[package]]
name = "crossbeam-utils"
version = "0.8.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80"
[[package]]
name = "crypto-common"
version = "0.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3"
dependencies = [
"generic-array",
"typenum",
]
[[package]]
name = "deflate64"
version = "0.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "da692b8d1080ea3045efaab14434d40468c3d8657e42abddfffca87b428f4c1b"
[[package]]
name = "deranged"
version = "0.3.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b42b6fa04a440b495c8b04d0e71b707c585f83cb9cb28cf8cd0d976c315e31b4"
dependencies = [
"powerfmt",
]
[[package]]
name = "derive_arbitrary"
version = "1.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "67e77553c4162a157adbf834ebae5b415acbecbeafc7a74b0e886657506a7611"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.71",
]
[[package]]
name = "digest"
version = "0.10.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292"
dependencies = [
"block-buffer",
"crypto-common",
"subtle",
]
[[package]]
name = "displaydoc"
version = "0.2.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.71",
]
[[package]]
name = "either"
version = "1.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0"
[[package]]
name = "equivalent"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5"
[[package]]
name = "flate2"
version = "1.0.30"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5f54427cfd1c7829e2a139fcefea601bf088ebca651d2bf53ebc600eac295dae"
dependencies = [
"crc32fast",
"miniz_oxide",
]
[[package]]
name = "generic-array"
version = "0.14.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a"
dependencies = [
"typenum",
"version_check",
]
[[package]]
name = "getrandom"
version = "0.2.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7"
dependencies = [
"cfg-if",
"libc",
"wasi",
]
[[package]]
name = "hashbrown"
version = "0.14.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1"
[[package]]
name = "heck"
version = "0.4.1"
@ -83,6 +332,34 @@ version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024"
[[package]]
name = "hmac"
version = "0.12.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6c49c37c09c17a53d937dfbb742eb3a961d65a994e6bcdcf37e7399d0cc8ab5e"
dependencies = [
"digest",
]
[[package]]
name = "indexmap"
version = "2.2.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "168fb715dda47215e360912c096649d23d58bf392ac62f73919e831745e40f26"
dependencies = [
"equivalent",
"hashbrown",
]
[[package]]
name = "inout"
version = "0.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a0c10553d664a4d0bcff9f4215d0aac67a639cc68ef660840afe309b807bc9f5"
dependencies = [
"generic-array",
]
[[package]]
name = "is-terminal"
version = "0.4.12"
@ -94,30 +371,85 @@ dependencies = [
"windows-sys",
]
[[package]]
name = "itertools"
version = "0.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186"
dependencies = [
"either",
]
[[package]]
name = "itoa"
version = "1.0.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b"
[[package]]
name = "jobserver"
version = "0.1.31"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d2b099aaa34a9751c5bf0878add70444e1ed2dd73f347be99003d4577277de6e"
dependencies = [
"libc",
]
[[package]]
name = "libc"
version = "0.2.155"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c"
[[package]]
name = "lockfree-object-pool"
version = "0.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9374ef4228402d4b7e403e5838cb880d9ee663314b0a900d5a6aabf0c213552e"
[[package]]
name = "log"
version = "0.4.22"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24"
[[package]]
name = "lzma-rs"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "297e814c836ae64db86b36cf2a557ba54368d03f6afcd7d947c266692f71115e"
dependencies = [
"byteorder",
"crc",
]
[[package]]
name = "main_error"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "155db5e86c6e45ee456bf32fad5a290ee1f7151c2faca27ea27097568da67d1a"
[[package]]
name = "memchr"
version = "2.7.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3"
[[package]]
name = "miniz_oxide"
version = "0.7.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b8a240ddb74feaf34a79a7add65a741f3167852fba007066dcac1ca548d89c08"
dependencies = [
"adler",
]
[[package]]
name = "num-conv"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9"
[[package]]
name = "once_cell"
version = "1.19.0"
@ -130,6 +462,34 @@ version = "6.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e2355d85b9a3786f481747ced0e0ff2ba35213a1f9bd406ed906554d7af805a1"
[[package]]
name = "pbkdf2"
version = "0.12.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f8ed6a7761f76e3b9f92dfb0a60a6a6477c61024b775147ff0973a02653abaf2"
dependencies = [
"digest",
"hmac",
]
[[package]]
name = "pkg-config"
version = "0.3.30"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d231b230927b5e4ad203db57bbcbee2802f6bce620b1e4a9024a07d94e2907ec"
[[package]]
name = "powerfmt"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391"
[[package]]
name = "ppv-lite86"
version = "0.2.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de"
[[package]]
name = "proc-macro-error"
version = "1.0.4"
@ -172,6 +532,36 @@ dependencies = [
"proc-macro2",
]
[[package]]
name = "rand"
version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
dependencies = [
"libc",
"rand_chacha",
"rand_core",
]
[[package]]
name = "rand_chacha"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
dependencies = [
"ppv-lite86",
"rand_core",
]
[[package]]
name = "rand_core"
version = "0.6.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
dependencies = [
"getrandom",
]
[[package]]
name = "regex"
version = "1.10.5"
@ -238,12 +628,35 @@ dependencies = [
"serde",
]
[[package]]
name = "sha1"
version = "0.10.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e3bf829a2d51ab4a5ddf1352d8470c140cadc8301b2ae1789db023f01cedd6ba"
dependencies = [
"cfg-if",
"cpufeatures",
"digest",
]
[[package]]
name = "simd-adler32"
version = "0.3.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d66dc143e6b11c1eddc06d5c423cfc97062865baf299914ab64caa38182078fe"
[[package]]
name = "strsim"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623"
[[package]]
name = "subtle"
version = "2.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292"
[[package]]
name = "syn"
version = "1.0.109"
@ -295,6 +708,31 @@ dependencies = [
"syn 2.0.71",
]
[[package]]
name = "time"
version = "0.3.36"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5dfd88e563464686c916c7e46e623e520ddc6d79fa6641390f2e3fa86e83e885"
dependencies = [
"deranged",
"num-conv",
"powerfmt",
"serde",
"time-core",
]
[[package]]
name = "time-core"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ef927ca75afb808a4d64dd374f00a2adf8d0fcff8e7b184af886c3c87ec4a3f3"
[[package]]
name = "typenum"
version = "1.17.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825"
[[package]]
name = "unicode-ident"
version = "1.0.12"
@ -307,6 +745,12 @@ version = "0.9.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
[[package]]
name = "wasi"
version = "0.11.0+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
[[package]]
name = "winapi-util"
version = "0.1.8"
@ -388,3 +832,94 @@ name = "windows_x86_64_msvc"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
[[package]]
name = "zeroize"
version = "1.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ced3678a2879b30306d323f4542626697a464a97c0a07c9aebf7ebca65cd4dde"
dependencies = [
"zeroize_derive",
]
[[package]]
name = "zeroize_derive"
version = "1.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ce36e65b0d2999d2aafac989fb249189a141aee1f53c612c1f37d72631959f69"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.71",
]
[[package]]
name = "zip"
version = "2.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b895748a3ebcb69b9d38dcfdf21760859a4b0d0b0015277640c2ef4c69640e6f"
dependencies = [
"aes",
"arbitrary",
"bzip2",
"constant_time_eq",
"crc32fast",
"crossbeam-utils",
"deflate64",
"displaydoc",
"flate2",
"hmac",
"indexmap",
"lzma-rs",
"memchr",
"pbkdf2",
"rand",
"sha1",
"thiserror",
"time",
"zeroize",
"zopfli",
"zstd",
]
[[package]]
name = "zopfli"
version = "0.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e5019f391bac5cf252e93bbcc53d039ffd62c7bfb7c150414d61369afe57e946"
dependencies = [
"bumpalo",
"crc32fast",
"lockfree-object-pool",
"log",
"once_cell",
"simd-adler32",
]
[[package]]
name = "zstd"
version = "0.13.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fcf2b778a664581e31e389454a7072dab1647606d44f7feea22cd5abb9c9f3f9"
dependencies = [
"zstd-safe",
]
[[package]]
name = "zstd-safe"
version = "7.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fa556e971e7b568dc775c136fc9de8c779b1c2fc3a63defaafadffdbd3181afa"
dependencies = [
"zstd-sys",
]
[[package]]
name = "zstd-sys"
version = "2.0.12+zstd.1.5.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0a4e40c320c3cb459d9a9ff6de98cff88f4751ee9275d140e2be94a2b74e4c13"
dependencies = [
"cc",
"pkg-config",
]

View file

@ -4,6 +4,7 @@ version = "0.1.0"
edition = "2021"
[dependencies]
main_error = "0.1.2"
thiserror = "1.0.63"
serde = { version = "1.0.204", features = ["derive"] }
serde_json = "1.0.120"
@ -11,6 +12,8 @@ regex = "1.10.5"
log = "0.4.22"
clap = { version = "=4.1.3", features = ["derive"] }
cloud-log-analyser-data = { version = "0.1.0", path = "./data" }
zip = "2.1.5"
itertools = "0.13.0"
[profile.dev.package."*"]
opt-level = 3

View file

@ -5,6 +5,9 @@ mod server_27;
mod server_28;
mod server_29;
pub const MIN_VERSION: u32 = 24;
pub const MAX_VERSION: u32 = 29;
pub fn get_statements(name: &str, version: u32) -> &[crate::LoggingStatement] {
match (name, version) {
("server", 24) => server_24::STATEMENTS,

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -1,5 +1,5 @@
mod data;
mod types;
pub use data::get_statements;
pub use data::{get_statements, MAX_VERSION, MIN_VERSION};
pub use types::*;

View file

@ -1,3 +1,5 @@
use std::fmt::{Display, Formatter};
#[derive(Debug, Default, PartialEq, Clone, Copy)]
pub enum LogLevel {
Debug,
@ -32,5 +34,36 @@ pub struct LoggingStatement {
pub path: &'static str,
pub line: usize,
pub placeholders: &'static [&'static str],
pub has_meaningful_message: bool,
pub exception: Option<&'static str>,
pub regex: &'static str,
}
impl LoggingStatement {
pub fn message(&self) -> impl Display + '_ {
LoggingMessage { message: &self }
}
}
struct LoggingMessage<'a> {
message: &'a LoggingStatement,
}
impl<'a> Display for LoggingMessage<'a> {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
if self.message.regex.is_empty() {
return Ok(());
}
let mut placeholder_index = 0;
let regex = &self.message.regex[1..self.message.regex.len() - 1];
for part in regex.split("(.*)") {
write!(f, "{part}")?;
if let Some(placeholder) = self.message.placeholders.get(placeholder_index) {
write!(f, "{placeholder}")?;
}
placeholder_index += 1;
}
Ok(())
}
}

View file

@ -107,7 +107,7 @@
releaseMatrix = buildMatrix releaseTargets;
devShells.default = mkShell {
nativeBuildInputs = with pkgs; [cargo rustc bacon cargo-msrv cargo-insta];
nativeBuildInputs = with pkgs; [cargo rustc bacon cargo-msrv cargo-insta clippy];
};
}
)

View file

@ -40,10 +40,12 @@ pub struct LoggingStatement<'a> {
pub path: &'a str,
pub line: usize,
pub placeholders: &'a [&'a str],
pub has_meaningful_message: bool,
pub exception: Option<&'a str>,
pub regex: &'a str,
}
fn build_pattern<'a>(parts: &[crate::MessagePart]) -> String {
fn build_pattern(parts: &[crate::MessagePart]) -> String {
let mut pattern = String::with_capacity(128);
pattern.push('^');
for part in parts {
@ -74,6 +76,8 @@ pub fn bake_statement(output: &mut String, statement: &crate::LoggingStatement)
level: statement.level.into(),
path: statement.path,
line: statement.line,
has_meaningful_message: statement.has_meaningful_message,
exception: statement.exception.as_deref(),
placeholders: &placeholders,
regex: &pattern,
};

View file

@ -1,5 +1,5 @@
use crate::string::{unescape, DoubleQuoteString, SingleQuoteString};
use crate::{LogLevel, LoggingStatement, MessagePart};
use crate::messagebuilder::MessageBuilder;
use crate::{LogLevel, LoggingStatement};
use tree_sitter::{Language, Node, Parser, Query, QueryCursor};
pub struct LogExtractor {
@ -15,7 +15,7 @@ impl LogExtractor {
&language,
r#"(member_call_expression
name: (name)@name
arguments: (arguments ((argument)+ @arg))
arguments: (arguments) @args
)"#,
)
.expect("invalid query");
@ -23,7 +23,8 @@ impl LogExtractor {
&language,
r#"(throw_expression
(object_creation_expression
(arguments ((argument)+ @arg))
[(name) (qualified_name)] @name
(arguments) @args
)
)"#,
)
@ -51,52 +52,25 @@ impl LogExtractor {
let mut log_call_cursor = QueryCursor::new();
let mut throw_call_cursor = QueryCursor::new();
let mut tree_cursor = tree.walk();
let log_calls = self.get_log_calls(&mut log_call_cursor, code, tree.root_node());
let throw_calls = self.get_throw_calls(&mut throw_call_cursor, code, tree.root_node());
let mut all = log_calls
.chain(throw_calls)
.filter_map(|call| {
let argument = call.arguments.child(0)?;
if argument.grammar_name() != "string"
&& argument.grammar_name() != "encapsed_string"
{
return None;
}
let mut argument_string_parts = argument.children(&mut tree_cursor);
let is_double_quote = argument_string_parts.next()?.grammar_name() == r#"""#;
let mut message_builder =
MessageBuilder::with_capacity(argument_string_parts.len());
let mut message_builder = MessageBuilder::with_capacity(16);
for string_part in argument_string_parts {
match string_part.grammar_name() {
"string_content" => {
let content = string_part.utf8_text(code.as_bytes()).unwrap();
message_builder.push_literal(content);
}
"escape_sequence" => {
let raw = string_part.utf8_text(code.as_bytes()).unwrap();
let content = if is_double_quote {
unescape::<DoubleQuoteString>(raw)
} else {
unescape::<SingleQuoteString>(raw)
}
.unwrap();
message_builder.push_literal(&content);
}
r#"'"# | r#"""# | r#"{"# | r#"}"# => {}
_ => {
let placeholder = string_part.utf8_text(code.as_bytes()).unwrap();
message_builder.push_placeholder(placeholder);
}
}
if let Some(argument) = call.arguments {
let argument = argument.child(0)?;
message_builder.push_node(argument, code);
}
Some(LoggingStatement {
level: call.level,
line: call.line + 1,
path,
message_parts: message_builder.0,
has_meaningful_message: message_builder.is_meaningful(),
exception: call.exception,
message_parts: message_builder.into(),
})
})
.collect::<Vec<_>>();
@ -120,11 +94,13 @@ impl LogExtractor {
.unwrap_or("malformed utf8");
let level = LogLevel::parse(name)?;
let line = method_call.captures[0].node.start_position().row;
let arguments = method_call.captures[1].node;
Some(LogCall {
level,
line,
arguments,
arguments: arguments.named_child(0),
exception: None,
})
})
}
@ -137,15 +113,22 @@ impl LogExtractor {
) -> impl Iterator<Item = LogCall> + 'a {
let throws = cursor.matches(&self.throw_query, node, code.as_bytes());
throws.filter_map(|method_call| {
throws.map(|method_call| {
let level = LogLevel::Exception;
let arguments = method_call.captures[0].node;
let arguments = method_call.captures[1].node;
let line = arguments.start_position().row;
Some(LogCall {
LogCall {
level,
line,
arguments,
})
arguments: arguments.named_child(0),
exception: Some(
method_call.captures[0]
.node
.utf8_text(code.as_bytes())
.unwrap()
.into(),
),
}
})
}
}
@ -159,36 +142,21 @@ impl Default for LogExtractor {
struct LogCall<'tree> {
level: LogLevel,
line: usize,
arguments: Node<'tree>,
}
struct MessageBuilder(Vec<MessagePart>);
impl MessageBuilder {
pub fn with_capacity(cap: usize) -> Self {
MessageBuilder(Vec::with_capacity(cap))
}
pub fn push_literal(&mut self, content: &str) {
if let Some(MessagePart::Literal(last_part)) = self.0.last_mut() {
last_part.push_str(content);
} else {
self.0.push(MessagePart::Literal(content.into()))
}
}
pub fn push_placeholder(&mut self, placeholder: &str) {
self.0.push(MessagePart::PlaceHolder(placeholder.into()));
}
exception: Option<String>,
arguments: Option<Node<'tree>>,
}
#[test]
fn test_extract_logging() {
use crate::MessagePart;
let code = r#"<?php
function test() {
$this->logger->warning("failed to find trash item for $rootTrashedItemName deleted at $rootTrashedItemDate in folder $groupFolderId", ['app' => 'groupfolders']);
$logger->info('foobar');
throw new FooException("foo \"bar\" \' {$this->blarg}");
throw new BarException();
$this->logger->error('Share notification mail could not be sent to: ' . implode(', ', $failedRecipients));
}
?>
"#;
@ -200,6 +168,8 @@ fn test_extract_logging() {
path: "foo.php",
line: 3,
level: LogLevel::Warn,
has_meaningful_message: true,
exception: None,
message_parts: vec![
MessagePart::Literal("failed to find trash item for ".into()),
MessagePart::PlaceHolder("$rootTrashedItemName".into()),
@ -216,6 +186,8 @@ fn test_extract_logging() {
path: "foo.php",
line: 4,
level: LogLevel::Info,
has_meaningful_message: true,
exception: None,
message_parts: vec![MessagePart::Literal("foobar".into())]
}
);
@ -225,10 +197,37 @@ fn test_extract_logging() {
path: "foo.php",
line: 5,
level: LogLevel::Exception,
has_meaningful_message: true,
exception: Some("FooException".into()),
message_parts: vec![
MessagePart::Literal(r#"foo "bar" \' "#.into()),
MessagePart::PlaceHolder("$this->blarg".into())
]
}
);
assert_eq!(
logs[3],
LoggingStatement {
path: "foo.php",
line: 6,
level: LogLevel::Exception,
has_meaningful_message: false,
exception: Some("BarException".into()),
message_parts: vec![]
}
);
assert_eq!(
logs[4],
LoggingStatement {
path: "foo.php",
line: 7,
level: LogLevel::Error,
has_meaningful_message: true,
exception: None,
message_parts: vec![
MessagePart::Literal("Share notification mail could not be sent to: ".into()),
MessagePart::PlaceHolder("implode(', ', $failedRecipients)".into())
]
}
);
}

View file

@ -30,6 +30,7 @@ impl LogLevel {
"emergency" => Some(LogLevel::Emergency),
"exception" => Some(LogLevel::Exception),
"log" => Some(LogLevel::Unknown),
"printErrorPage" => Some(LogLevel::Unknown),
_ => None,
}
}

View file

@ -10,6 +10,7 @@ mod bake;
pub mod error;
pub mod extractor;
mod level;
mod messagebuilder;
pub mod string;
use crate::bake::bake_statement;
@ -20,6 +21,8 @@ pub struct LoggingStatement<'a> {
level: LogLevel,
path: &'a str,
line: usize,
has_meaningful_message: bool,
exception: Option<String>,
message_parts: Vec<MessagePart>,
}

View file

@ -0,0 +1,91 @@
use crate::string::{unescape, DoubleQuoteString, SingleQuoteString};
use crate::MessagePart;
use tree_sitter::Node;
pub struct MessageBuilder {
pub parts: Vec<MessagePart>,
}
impl MessageBuilder {
pub fn with_capacity(cap: usize) -> Self {
MessageBuilder {
parts: Vec::with_capacity(cap),
}
}
pub fn push_literal(&mut self, content: &str) {
if let Some(MessagePart::Literal(last_part)) = self.parts.last_mut() {
last_part.push_str(content);
} else {
self.parts.push(MessagePart::Literal(content.into()))
}
}
pub fn push_placeholder(&mut self, placeholder: &str) {
self.parts
.push(MessagePart::PlaceHolder(placeholder.into()));
}
pub fn push_node(&mut self, node: Node, code: &str) {
let mut cursor = node.walk();
match node.grammar_name() {
"string" | "encapsed_string" => {
let mut argument_string_parts = node.children(&mut cursor);
let is_double_quote = argument_string_parts
.next()
.map(|child| child.grammar_name())
.unwrap_or_default()
== r#"""#;
for string_part in argument_string_parts {
match string_part.grammar_name() {
"string_content" => {
let content = string_part.utf8_text(code.as_bytes()).unwrap();
self.push_literal(content);
}
"escape_sequence" => {
let raw = string_part.utf8_text(code.as_bytes()).unwrap();
let content = if is_double_quote {
unescape::<DoubleQuoteString>(raw)
} else {
unescape::<SingleQuoteString>(raw)
}
.unwrap();
self.push_literal(&content);
}
r#"'"# | r#"""# | r#"{"# | r#"}"# => {}
_ => {
let placeholder = string_part.utf8_text(code.as_bytes()).unwrap();
self.push_placeholder(placeholder);
}
}
}
}
"binary_expression" => {
let start = node.named_child(0).unwrap().range().end_byte;
let end = node.named_child(1).unwrap().range().start_byte;
let operator = &code[start..end];
if operator.trim() == "." {
for part in node.named_children(&mut cursor) {
self.push_node(part, code);
}
}
}
_ => {
let placeholder = node.utf8_text(code.as_bytes()).unwrap();
self.push_placeholder(placeholder);
}
}
}
/// Ensure there is at least some text to match
pub fn is_meaningful(&self) -> bool {
self.parts.iter().any(|part| matches!(part, MessagePart::Literal(part) if part.contains(|c: char| c.is_ascii_alphanumeric())))
}
}
impl From<MessageBuilder> for Vec<MessagePart> {
fn from(value: MessageBuilder) -> Self {
value.parts
}
}

View file

@ -217,6 +217,7 @@ pub fn is_array_key_numeric(string: &str) -> bool {
#[cfg(test)]
mod tests {
#![allow(clippy::needless_raw_string_hashes)]
use super::*;
#[test]

View file

@ -7,16 +7,38 @@ expression: output
"level": "exception",
"path": "/DefaultShareProvider.php",
"line": 129,
"has_meaningful_message": true,
"exception": "\\Exception",
"message_parts": [
{
"Literal": "invalid share type!"
}
]
},
{
"level": "exception",
"path": "/DefaultShareProvider.php",
"line": 305,
"has_meaningful_message": true,
"exception": "ProviderException",
"message_parts": [
{
"Literal": "Group \""
},
{
"PlaceHolder": "$share->getSharedWith()"
},
{
"Literal": "\" does not exist"
}
]
},
{
"level": "exception",
"path": "/DefaultShareProvider.php",
"line": 309,
"has_meaningful_message": true,
"exception": "ProviderException",
"message_parts": [
{
"Literal": "Recipient not in receiving group"
@ -27,6 +49,8 @@ expression: output
"level": "exception",
"path": "/DefaultShareProvider.php",
"line": 339,
"has_meaningful_message": true,
"exception": "ProviderException",
"message_parts": [
{
"Literal": "Recipient does not match"
@ -37,16 +61,38 @@ expression: output
"level": "exception",
"path": "/DefaultShareProvider.php",
"line": 344,
"has_meaningful_message": true,
"exception": "ProviderException",
"message_parts": [
{
"Literal": "Invalid shareType"
}
]
},
{
"level": "exception",
"path": "/DefaultShareProvider.php",
"line": 431,
"has_meaningful_message": true,
"exception": "ProviderException",
"message_parts": [
{
"Literal": "Group \""
},
{
"PlaceHolder": "$share->getSharedWith()"
},
{
"Literal": "\" does not exist"
}
]
},
{
"level": "exception",
"path": "/DefaultShareProvider.php",
"line": 476,
"has_meaningful_message": true,
"exception": "ProviderException",
"message_parts": [
{
"Literal": "Recipient does not match"
@ -57,6 +103,8 @@ expression: output
"level": "exception",
"path": "/DefaultShareProvider.php",
"line": 482,
"has_meaningful_message": true,
"exception": "ProviderException",
"message_parts": [
{
"Literal": "Invalid shareType"
@ -67,30 +115,155 @@ expression: output
"level": "exception",
"path": "/DefaultShareProvider.php",
"line": 609,
"has_meaningful_message": true,
"exception": "\\Exception",
"message_parts": [
{
"Literal": "non-shallow getSharesInFolder is no longer supported"
}
]
},
{
"level": "exception",
"path": "/DefaultShareProvider.php",
"line": 763,
"has_meaningful_message": false,
"exception": "ShareNotFound",
"message_parts": []
},
{
"level": "exception",
"path": "/DefaultShareProvider.php",
"line": 769,
"has_meaningful_message": false,
"exception": "ShareNotFound",
"message_parts": []
},
{
"level": "exception",
"path": "/DefaultShareProvider.php",
"line": 963,
"has_meaningful_message": true,
"exception": "BackendError",
"message_parts": [
{
"Literal": "Invalid backend"
}
]
},
{
"level": "exception",
"path": "/DefaultShareProvider.php",
"line": 993,
"has_meaningful_message": false,
"exception": "ShareNotFound",
"message_parts": []
},
{
"level": "exception",
"path": "/DefaultShareProvider.php",
"line": 999,
"has_meaningful_message": false,
"exception": "ShareNotFound",
"message_parts": []
},
{
"level": "error",
"path": "/DefaultShareProvider.php",
"line": 1171,
"has_meaningful_message": false,
"exception": null,
"message_parts": [
{
"PlaceHolder": "$e->getMessage()"
}
]
},
{
"level": "debug",
"path": "/DefaultShareProvider.php",
"line": 1392,
"has_meaningful_message": true,
"exception": null,
"message_parts": [
{
"Literal": "Share notification not sent to "
},
{
"PlaceHolder": "$share->getSharedWith()"
},
{
"Literal": " because user could not be found."
}
]
},
{
"level": "debug",
"path": "/DefaultShareProvider.php",
"line": 1401,
"has_meaningful_message": true,
"exception": null,
"message_parts": [
{
"Literal": "Share notification not sent to "
},
{
"PlaceHolder": "$share->getSharedWith()"
},
{
"Literal": " because email address is not set."
}
]
},
{
"level": "debug",
"path": "/DefaultShareProvider.php",
"line": 1416,
"has_meaningful_message": true,
"exception": null,
"message_parts": [
{
"Literal": "Sent share notification to "
},
{
"PlaceHolder": "$emailAddress"
},
{
"Literal": " for share with ID "
},
{
"PlaceHolder": "$share->getId()"
},
{
"Literal": "."
}
]
},
{
"level": "error",
"path": "/DefaultShareProvider.php",
"line": 1420,
"has_meaningful_message": true,
"exception": null,
"message_parts": [
{
"Literal": "Share notification mail could not be sent."
}
]
},
{
"level": "error",
"path": "/DefaultShareProvider.php",
"line": 1507,
"has_meaningful_message": true,
"exception": null,
"message_parts": [
{
"Literal": "Share notification mail could not be sent to: "
},
{
"PlaceHolder": "implode(', ', $failedRecipients)"
}
]
}
]

20
src/error.rs Normal file
View file

@ -0,0 +1,20 @@
use thiserror::Error;
use zip::result::ZipError;
#[derive(Debug, Error)]
pub enum LogError {
#[error("Error while reading input file '{path}': {err:#}")]
Read { err: ReadError, path: String },
}
#[derive(Debug, Error)]
pub enum ReadError {
#[error(transparent)]
Io(#[from] std::io::Error),
#[error(transparent)]
Zip(#[from] ZipError),
#[error("archive contains multiple files")]
MultipleFiles,
#[error("archive contains no files")]
NoFiles,
}

40
src/logfile.rs Normal file
View file

@ -0,0 +1,40 @@
use crate::error::ReadError;
use itertools::Either;
use std::fs::File;
use std::io::{BufRead, BufReader};
use zip::ZipArchive;
pub enum LogFile {
Plain(BufReader<File>),
Zip(ZipArchive<File>),
}
impl LogFile {
pub fn open(path: &str) -> Result<LogFile, ReadError> {
let file = File::open(path)?;
if path.ends_with(".zip") {
let mut zip = ZipArchive::new(file)?;
if zip.len() > 1 {
return Err(ReadError::MultipleFiles);
} else if zip.is_empty() {
return Err(ReadError::NoFiles);
}
// ensure we can open the file
let _ = zip.by_index(0)?;
Ok(LogFile::Zip(zip))
} else {
Ok(LogFile::Plain(BufReader::new(file)))
}
}
pub fn iter(&mut self) -> impl Iterator<Item = String> + '_ {
match self {
LogFile::Plain(file) => Either::Left(file.lines().flatten()),
LogFile::Zip(zip) => {
let file = zip.by_index(0).expect("failed to open zip content again");
Either::Right(BufReader::new(file).lines().flatten())
}
}
}
}

20
src/logline.rs Normal file
View file

@ -0,0 +1,20 @@
use serde::Deserialize;
use std::borrow::Cow;
#[derive(Deserialize)]
pub struct LogLine<'a> {
pub version: &'a str,
pub level: i64,
pub message: Cow<'a, str>,
}
impl LogLine<'_> {
pub fn major_version(&self) -> Option<u32> {
let major = self
.version
.split_once('.')
.map(|(major, _)| major)
.unwrap_or(self.version);
major.parse().ok()
}
}

View file

@ -1,97 +1,81 @@
use crate::error::LogError;
use crate::logfile::LogFile;
use crate::logline::LogLine;
use crate::matcher::Matcher;
use clap::Parser;
use cloud_log_analyser_data::get_statements;
use serde::Deserialize;
use std::borrow::Cow;
use cloud_log_analyser_data::{get_statements, MAX_VERSION};
use main_error::MainResult;
use std::collections::HashMap;
use std::fs::File;
use std::io::{BufRead, BufReader};
use std::iter::once;
use std::ops::AddAssign;
mod error;
mod logfile;
mod logline;
mod matcher;
#[derive(Debug, Parser)]
enum Args {
Log(LogCommand),
File(FileCommand),
}
#[derive(Debug, Parser)]
struct LogCommand {
line: String,
}
#[derive(Debug, Parser)]
struct FileCommand {
struct Args {
file: String,
}
#[derive(Deserialize)]
struct LogLine<'a> {
version: &'a str,
level: i64,
message: Cow<'a, str>,
}
fn main() {
fn main() -> MainResult {
let args = Args::parse();
match args {
Args::Log(LogCommand { line }) => {
let parsed_line: LogLine = serde_json::from_str(&line).unwrap();
let major = parsed_line.version.split(".").next().unwrap();
let major = major.parse().unwrap();
let statements = get_statements("server", major);
let matcher = Matcher::new(statements);
let index = matcher.match_log(parsed_line.level.into(), parsed_line.message.as_ref());
if let Some(index) = index {
let statement = &statements[index];
println!("match found: {} line {}", statement.path, statement.line);
} else {
eprintln!("No match found");
}
}
Args::File(FileCommand { file }) => {
let file = BufReader::new(File::open(file).unwrap());
let mut counts: HashMap<usize, usize> = HashMap::default();
let mut lines = file.lines().flatten();
let first = lines.next().unwrap();
let first_parsed: LogLine = serde_json::from_str(&first).unwrap();
let mut log_file = LogFile::open(&args.file).map_err(|err| LogError::Read {
err,
path: args.file,
})?;
let mut lines = log_file.iter();
let major = first_parsed.version.split(".").next().unwrap();
let major = major.parse().unwrap();
let statements = get_statements("server", major);
let matcher = Matcher::new(statements);
let mut counts: HashMap<usize, usize> = HashMap::new();
let first = lines.next().unwrap();
let first_parsed: LogLine = serde_json::from_str(&first).unwrap();
let lines = once(first).chain(lines);
let mut error_count = 0;
for line in lines {
if line.starts_with('{') {
let parsed = match serde_json::from_str::<LogLine>(&line) {
Ok(parsed) => parsed,
Err(_) => {
error_count += 1;
continue;
}
};
if let Some(index) =
matcher.match_log(parsed.level.into(), parsed.message.as_ref())
{
counts.entry(index).or_default().add_assign(1);
}
let statements = get_statements(
"server",
first_parsed.major_version().unwrap_or(MAX_VERSION),
);
let matcher = Matcher::new(statements);
let lines = once(first).chain(lines);
let mut error_count = 0;
let mut unmatched = 0;
for line in lines {
if line.starts_with('{') {
let parsed = match serde_json::from_str::<LogLine>(&line) {
Ok(parsed) => parsed,
Err(_) => {
error_count += 1;
continue;
}
}
let mut counts: Vec<(_, _)> = counts.into_iter().collect();
counts.sort_by_key(|(_, count)| *count);
counts.reverse();
for (index, count) in counts {
let statement = &statements[index];
println!("{} line {}: {}", statement.path, statement.line, count);
}
if error_count > 0 {
eprintln!("{error_count} lines failed to parse as valid log json");
};
if let Some(index) = matcher.match_log(parsed.level.into(), parsed.message.as_ref()) {
counts.entry(index).or_default().add_assign(1);
} else {
unmatched += 1;
}
}
}
let mut counts: Vec<(_, _)> = counts.into_iter().collect();
counts.sort_by_key(|(_, count)| *count);
counts.reverse();
for (index, count) in counts {
let statement = &statements[index];
println!(
"{}: {} line {}: {}",
statement.message(),
statement.path,
statement.line,
count
);
}
if unmatched > 0 {
eprintln!("{unmatched} lines couldn't be matched");
}
if error_count > 0 {
eprintln!("{error_count} lines failed to parse as valid log json");
}
Ok(())
}

View file

@ -5,6 +5,8 @@ pub struct LogMatch {
level: LogLevel,
pattern: Regex,
pattern_length: usize,
has_meaningful_message: bool,
exception: Option<&'static str>,
}
impl LogMatch {
@ -13,6 +15,8 @@ impl LogMatch {
level: statement.level,
pattern: Regex::new(statement.regex).unwrap(),
pattern_length: statement.regex.len(),
has_meaningful_message: statement.has_meaningful_message,
exception: statement.exception,
}
}
}
@ -33,14 +37,16 @@ impl Matcher {
let mut best_length = 0;
for (i, log_match) in self.matches.iter().enumerate() {
if (log_match.level == level
|| log_match.level == LogLevel::Exception
|| level == LogLevel::Unknown)
&& log_match.pattern.is_match(message)
&& log_match.pattern_length > best_length
{
best_match = Some(i);
best_length = log_match.pattern_length;
if log_match.has_meaningful_message {
if (log_match.level == level
|| log_match.level == LogLevel::Exception
|| level == LogLevel::Unknown)
&& log_match.pattern.is_match(message)
&& log_match.pattern_length > best_length
{
best_match = Some(i);
best_length = log_match.pattern_length;
}
}
}