initial log extraction logic

This commit is contained in:
Robin Appelman 2024-07-20 16:13:58 +02:00
commit 572582517c
18 changed files with 2827 additions and 0 deletions

1
.envrc Normal file
View file

@ -0,0 +1 @@
use flake

5
.gitignore vendored Normal file
View file

@ -0,0 +1,5 @@
target
/data
.direnv
.env
result

7
Cargo.lock generated Normal file
View file

@ -0,0 +1,7 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "cloud-log-analyser"
version = "0.1.0"

7
Cargo.toml Normal file
View file

@ -0,0 +1,7 @@
[package]
name = "cloud-log-analyser"
version = "0.1.0"
edition = "2021"
rust-version = "1.66.0"
[dependencies]

126
flake.lock generated Normal file
View file

@ -0,0 +1,126 @@
{
"nodes": {
"cross-naersk": {
"inputs": {
"naersk": [
"naersk"
],
"nixpkgs": [
"nixpkgs"
]
},
"locked": {
"lastModified": 1717704286,
"narHash": "sha256-zrLB/FTKODEAlJjgO8TwbK7teTseYbjLESp8QJ/FJYc=",
"owner": "icewind1991",
"repo": "cross-naersk",
"rev": "9068daceb8f0d248dcf629944f60e92b81391bdb",
"type": "github"
},
"original": {
"owner": "icewind1991",
"repo": "cross-naersk",
"type": "github"
}
},
"flake-utils": {
"inputs": {
"systems": "systems"
},
"locked": {
"lastModified": 1710146030,
"narHash": "sha256-SZ5L6eA7HJ/nmkzGG7/ISclqe6oZdOZTNoesiInkXPQ=",
"owner": "numtide",
"repo": "flake-utils",
"rev": "b1d9ab70662946ef0850d488da1c9019f3a9752a",
"type": "github"
},
"original": {
"owner": "numtide",
"repo": "flake-utils",
"type": "github"
}
},
"naersk": {
"inputs": {
"nixpkgs": [
"nixpkgs"
]
},
"locked": {
"lastModified": 1718727675,
"narHash": "sha256-uFsCwWYI2pUpt0awahSBorDUrUfBhaAiyz+BPTS2MHk=",
"owner": "nix-community",
"repo": "naersk",
"rev": "941ce6dc38762a7cfb90b5add223d584feed299b",
"type": "github"
},
"original": {
"owner": "nix-community",
"repo": "naersk",
"type": "github"
}
},
"nixpkgs": {
"locked": {
"lastModified": 1720535198,
"narHash": "sha256-zwVvxrdIzralnSbcpghA92tWu2DV2lwv89xZc8MTrbg=",
"owner": "NixOS",
"repo": "nixpkgs",
"rev": "205fd4226592cc83fd4c0885a3e4c9c400efabb5",
"type": "github"
},
"original": {
"id": "nixpkgs",
"ref": "nixos-23.11",
"type": "indirect"
}
},
"root": {
"inputs": {
"cross-naersk": "cross-naersk",
"flake-utils": "flake-utils",
"naersk": "naersk",
"nixpkgs": "nixpkgs",
"rust-overlay": "rust-overlay"
}
},
"rust-overlay": {
"inputs": {
"nixpkgs": [
"nixpkgs"
]
},
"locked": {
"lastModified": 1721441897,
"narHash": "sha256-gYGX9/22tPNeF7dR6bWN5rsrpU4d06GnQNNgZ6ZiXz0=",
"owner": "oxalica",
"repo": "rust-overlay",
"rev": "b7996075da11a2d441cfbf4e77c2939ce51506fd",
"type": "github"
},
"original": {
"owner": "oxalica",
"repo": "rust-overlay",
"type": "github"
}
},
"systems": {
"locked": {
"lastModified": 1681028828,
"narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
"owner": "nix-systems",
"repo": "default",
"rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
"type": "github"
},
"original": {
"owner": "nix-systems",
"repo": "default",
"type": "github"
}
}
},
"root": "root",
"version": 7
}

105
flake.nix Normal file
View file

@ -0,0 +1,105 @@
{
inputs = {
nixpkgs.url = "nixpkgs/nixos-23.11";
flake-utils.url = "github:numtide/flake-utils";
naersk.url = "github:nix-community/naersk";
naersk.inputs.nixpkgs.follows = "nixpkgs";
rust-overlay.url = "github:oxalica/rust-overlay";
rust-overlay.inputs.nixpkgs.follows = "nixpkgs";
rust-overlay.inputs.flake-utils.follows = "flake-utils";
cross-naersk.url = "github:icewind1991/cross-naersk";
cross-naersk.inputs.nixpkgs.follows = "nixpkgs";
cross-naersk.inputs.naersk.follows = "naersk";
};
outputs = {
self,
nixpkgs,
flake-utils,
naersk,
rust-overlay,
cross-naersk,
}:
flake-utils.lib.eachDefaultSystem (
system: let
overlays = [
(import rust-overlay)
(import ./overlay.nix)
];
pkgs = (import nixpkgs) {
inherit system overlays;
};
inherit (pkgs) lib callPackage rust-bin mkShell;
inherit (lib.sources) sourceByRegex;
inherit (builtins) fromTOML readFile map;
msrv = (fromTOML (readFile ./Cargo.toml)).package.rust-version;
toolchain = rust-bin.stable.latest.default;
msrvToolchain = rust-bin.stable."${msrv}".default;
naersk' = callPackage naersk {
rustc = toolchain;
cargo = toolchain;
};
msrvNaersk = callPackage naersk {
rustc = msrvToolchain;
cargo = msrvToolchain;
};
cross-naersk' = pkgs.callPackage cross-naersk {inherit naersk;};
buildMatrix = targets: {
include =
map (target: {
inherit target;
artifactSuffix = cross-naersk'.execSufficForTarget target;
})
targets;
};
hostTarget = pkgs.hostPlatform.config;
targets = [
"x86_64-unknown-linux-musl"
"x86_64-pc-windows-gnu"
hostTarget
];
releaseTargets = lib.lists.remove hostTarget targets;
src = sourceByRegex ./. ["Cargo.*" "(src)(/.*)?"];
nearskOpt = {
pname = "cloud-log-analyser";
root = src;
};
in rec {
packages =
lib.attrsets.genAttrs targets (target:
(cross-naersk'.buildPackage target) nearskOpt)
// {
shelve = pkgs.shelve;
check = naersk'.buildPackage (nearskOpt
// {
mode = "check";
});
clippy = naersk'.buildPackage (nearskOpt
// {
mode = "clippy";
});
msrv = msrvNaersk.buildPackage (nearskOpt
// {
mode = "check";
});
default = pkgs.shelve;
};
apps.default = packages.default;
matrix = buildMatrix targets;
releaseMatrix = buildMatrix releaseTargets;
devShells.default = mkShell {
nativeBuildInputs = with pkgs; [toolchain bacon cargo-msrv cargo-insta];
};
}
)
// {
overlays.default = import ./overlay.nix;
};
}

501
logging-extractor/Cargo.lock generated Normal file
View file

@ -0,0 +1,501 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "aho-corasick"
version = "1.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916"
dependencies = [
"memchr",
]
[[package]]
name = "anstream"
version = "0.6.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "418c75fa768af9c03be99d17643f93f79bbba589895012a80e3452a19ddda15b"
dependencies = [
"anstyle",
"anstyle-parse",
"anstyle-query",
"anstyle-wincon",
"colorchoice",
"is_terminal_polyfill",
"utf8parse",
]
[[package]]
name = "anstyle"
version = "1.0.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "038dfcf04a5feb68e9c60b21c9625a54c2c0616e79b72b0fd87075a056ae1d1b"
[[package]]
name = "anstyle-parse"
version = "0.2.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c03a11a9034d92058ceb6ee011ce58af4a9bf61491aa7e1e59ecd24bd40d22d4"
dependencies = [
"utf8parse",
]
[[package]]
name = "anstyle-query"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ad186efb764318d35165f1758e7dcef3b10628e26d41a44bc5550652e6804391"
dependencies = [
"windows-sys",
]
[[package]]
name = "anstyle-wincon"
version = "3.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "61a38449feb7068f52bb06c12759005cf459ee52bb4adc1d5a7c4322d716fb19"
dependencies = [
"anstyle",
"windows-sys",
]
[[package]]
name = "cc"
version = "1.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2aba8f4e9906c7ce3c73463f62a7f0c65183ada1a2d47e397cc8810827f9694f"
[[package]]
name = "cfg-if"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "clap"
version = "4.5.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "64acc1846d54c1fe936a78dc189c34e28d3f5afc348403f28ecf53660b9b8462"
dependencies = [
"clap_builder",
"clap_derive",
]
[[package]]
name = "clap_builder"
version = "4.5.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6fb8393d67ba2e7bfaf28a23458e4e2b543cc73a99595511eb207fdb8aede942"
dependencies = [
"anstream",
"anstyle",
"clap_lex",
"strsim",
]
[[package]]
name = "clap_derive"
version = "4.5.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2bac35c6dafb060fd4d275d9a4ffae97917c13a6327903a8be2153cd964f7085"
dependencies = [
"heck",
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "clap_lex"
version = "0.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4b82cf0babdbd58558212896d1a4272303a57bdb245c2bf1147185fb45640e70"
[[package]]
name = "colorchoice"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0b6a852b24ab71dffc585bcb46eaf7959d175cb865a7152e35b348d1b2960422"
[[package]]
name = "console"
version = "0.15.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0e1f83fc076bd6dd27517eacdf25fef6c4dfe5f1d7448bafaaf3a26f13b5e4eb"
dependencies = [
"encode_unicode",
"lazy_static",
"libc",
"windows-sys",
]
[[package]]
name = "encode_unicode"
version = "0.3.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f"
[[package]]
name = "heck"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
[[package]]
name = "insta"
version = "1.39.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "810ae6042d48e2c9e9215043563a58a80b877bc863228a74cf10c49d4620a6f5"
dependencies = [
"console",
"lazy_static",
"linked-hash-map",
"serde",
"similar",
]
[[package]]
name = "is_terminal_polyfill"
version = "1.70.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f8478577c03552c21db0e2724ffb8986a5ce7af88107e6be5d2ee6e158c12800"
[[package]]
name = "itoa"
version = "1.0.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b"
[[package]]
name = "lazy_static"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
[[package]]
name = "libc"
version = "0.2.155"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c"
[[package]]
name = "linked-hash-map"
version = "0.5.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0717cef1bc8b636c6e1c1bbdefc09e6322da8a9321966e8928ef80d20f7f770f"
[[package]]
name = "logging-extractor"
version = "0.1.0"
dependencies = [
"cc",
"clap",
"insta",
"serde",
"serde_json",
"test-case",
"thiserror",
"tree-sitter",
"tree-sitter-php",
"walkdir",
]
[[package]]
name = "memchr"
version = "2.7.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3"
[[package]]
name = "proc-macro2"
version = "1.0.86"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77"
dependencies = [
"unicode-ident",
]
[[package]]
name = "quote"
version = "1.0.36"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7"
dependencies = [
"proc-macro2",
]
[[package]]
name = "regex"
version = "1.10.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b91213439dad192326a0d7c6ee3955910425f441d7038e0d6933b0aec5c4517f"
dependencies = [
"aho-corasick",
"memchr",
"regex-automata",
"regex-syntax",
]
[[package]]
name = "regex-automata"
version = "0.4.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "38caf58cc5ef2fed281f89292ef23f6365465ed9a41b7a7754eb4e26496c92df"
dependencies = [
"aho-corasick",
"memchr",
"regex-syntax",
]
[[package]]
name = "regex-syntax"
version = "0.8.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b"
[[package]]
name = "ryu"
version = "1.0.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f"
[[package]]
name = "same-file"
version = "1.0.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502"
dependencies = [
"winapi-util",
]
[[package]]
name = "serde"
version = "1.0.204"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bc76f558e0cbb2a839d37354c575f1dc3fdc6546b5be373ba43d95f231bf7c12"
dependencies = [
"serde_derive",
]
[[package]]
name = "serde_derive"
version = "1.0.204"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e0cd7e117be63d3c3678776753929474f3b04a43a080c744d6b0ae2a8c28e222"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "serde_json"
version = "1.0.120"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4e0d21c9a8cae1235ad58a00c11cb40d4b1e5c784f1ef2c537876ed6ffd8b7c5"
dependencies = [
"itoa",
"ryu",
"serde",
]
[[package]]
name = "similar"
version = "2.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fa42c91313f1d05da9b26f267f931cf178d4aba455b4c4622dd7355eb80c6640"
[[package]]
name = "strsim"
version = "0.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
[[package]]
name = "syn"
version = "2.0.71"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b146dcf730474b4bcd16c311627b31ede9ab149045db4d6088b3becaea046462"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "test-case"
version = "3.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "eb2550dd13afcd286853192af8601920d959b14c401fcece38071d53bf0768a8"
dependencies = [
"test-case-macros",
]
[[package]]
name = "test-case-core"
version = "3.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "adcb7fd841cd518e279be3d5a3eb0636409487998a4aff22f3de87b81e88384f"
dependencies = [
"cfg-if",
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "test-case-macros"
version = "3.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c89e72a01ed4c579669add59014b9a524d609c0c88c6a585ce37485879f6ffb"
dependencies = [
"proc-macro2",
"quote",
"syn",
"test-case-core",
]
[[package]]
name = "thiserror"
version = "1.0.63"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c0342370b38b6a11b6cc11d6a805569958d54cfa061a29969c3b5ce2ea405724"
dependencies = [
"thiserror-impl",
]
[[package]]
name = "thiserror-impl"
version = "1.0.63"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a4558b58466b9ad7ca0f102865eccc95938dca1a74a856f2b57b6629050da261"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "tree-sitter"
version = "0.22.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "df7cc499ceadd4dcdf7ec6d4cbc34ece92c3fa07821e287aedecd4416c516dca"
dependencies = [
"cc",
"regex",
]
[[package]]
name = "tree-sitter-php"
version = "0.22.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "91010beb390d81a5d720c2d2d87b113892ac38ce3ebc99270183973a066f72e4"
dependencies = [
"cc",
"tree-sitter",
]
[[package]]
name = "unicode-ident"
version = "1.0.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
[[package]]
name = "utf8parse"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
[[package]]
name = "walkdir"
version = "2.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b"
dependencies = [
"same-file",
"winapi-util",
]
[[package]]
name = "winapi-util"
version = "0.1.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4d4cc384e1e73b93bafa6fb4f1df8c41695c8a91cf9c4c64358067d15a7b6c6b"
dependencies = [
"windows-sys",
]
[[package]]
name = "windows-sys"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d"
dependencies = [
"windows-targets",
]
[[package]]
name = "windows-targets"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
dependencies = [
"windows_aarch64_gnullvm",
"windows_aarch64_msvc",
"windows_i686_gnu",
"windows_i686_gnullvm",
"windows_i686_msvc",
"windows_x86_64_gnu",
"windows_x86_64_gnullvm",
"windows_x86_64_msvc",
]
[[package]]
name = "windows_aarch64_gnullvm"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
[[package]]
name = "windows_aarch64_msvc"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
[[package]]
name = "windows_i686_gnu"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
[[package]]
name = "windows_i686_gnullvm"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
[[package]]
name = "windows_i686_msvc"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
[[package]]
name = "windows_x86_64_gnu"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
[[package]]
name = "windows_x86_64_gnullvm"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
[[package]]
name = "windows_x86_64_msvc"
version = "0.52.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"

View file

@ -0,0 +1,26 @@
[package]
name = "logging-extractor"
version = "0.1.0"
edition = "2021"
rust-version = "1.66.0"
[lib]
[[bin]]
name = "logging-extractor"
[dependencies]
thiserror = "1.0.63"
clap = { version = "4.5.9", features = ["derive"] }
serde = { version = "1.0.204", features = ["derive"] }
serde_json = "1.0.120"
walkdir = "2.5.0"
tree-sitter = "0.22.6"
tree-sitter-php = "0.22.7"
[build-dependencies]
cc = "1.1.6"
[dev-dependencies]
test-case = "3.3.1"
insta = { version = "1.39.0", features = ["json"] }

View file

@ -0,0 +1,12 @@
use std::path::PathBuf;
use thiserror::Error;
#[derive(Debug, Error)]
pub enum Error {
#[error("Failed to determine absolute root path ({}: {err:#}", path.display())]
RealPath { path: PathBuf, err: std::io::Error },
#[error("Failed to open source file ({}: {err:#}", path.display())]
Open { path: PathBuf, err: std::io::Error },
#[error("Failed to read source file ({}: {err:#}", path.display())]
Read { path: PathBuf, err: std::io::Error },
}

View file

@ -0,0 +1,141 @@
use crate::{LogLevel, LoggingStatement};
use tree_sitter::{Language, Node, Parser, Query, QueryCursor};
pub struct LogExtractor {
language: Language,
method_query: Query,
string_query: Query,
}
impl LogExtractor {
pub fn new() -> Self {
let language = tree_sitter_php::language_php();
let method_query = Query::new(
&language,
r#"(
member_call_expression
name: (name)@name
arguments: (arguments ((argument)+ @args))
)"#,
)
.expect("invalid query");
let string_query =
Query::new(&language, r#"(string_content)@string"#).expect("invalid query");
LogExtractor {
language,
method_query,
string_query,
}
}
pub fn extract<'a>(
&self,
path: &'a str,
code: &'a str,
) -> impl Iterator<Item = LoggingStatement<'a>> + 'a {
let mut parser = Parser::new();
parser
.set_language(&self.language)
.expect("Error loading PHP grammar");
parser.set_timeout_micros(10 * 1000 * 1000);
let tree = parser.parse(code, None).expect("parse timeout or canceled");
let mut log_call_cursor = QueryCursor::new();
let log_calls = self.get_log_calls(&mut log_call_cursor, code, tree.root_node());
log_calls
.map(|call| {
let mut string_cursor = QueryCursor::new();
let message_parts = string_cursor
.matches(&self.string_query, call.arguments, code.as_bytes())
.map(|result| {
result.captures[0]
.node
.utf8_text(code.as_bytes())
.unwrap_or("malformed utf8")
})
.collect();
LoggingStatement {
level: call.level,
line: call.line + 1,
path,
message_parts,
}
})
.collect::<Vec<_>>()
.into_iter()
}
fn get_log_calls<'a>(
&'a self,
cursor: &'a mut QueryCursor,
code: &'a str,
node: Node<'a>,
) -> impl Iterator<Item = LogCall> + 'a {
let method_calls = cursor.matches(&self.method_query, node, code.as_bytes());
method_calls.filter_map(|method_call| {
let name = method_call.captures[0]
.node
.utf8_text(code.as_bytes())
.unwrap_or("malformed utf8");
let level = LogLevel::parse(name)?;
let line = method_call.captures[0].node.start_position().row;
let arguments = method_call.captures[1].node;
Some(LogCall {
level,
line,
arguments,
})
})
}
}
impl Default for LogExtractor {
fn default() -> Self {
Self::new()
}
}
struct LogCall<'tree> {
level: LogLevel,
line: usize,
arguments: Node<'tree>,
}
#[test]
fn test_extract_logging() {
let code = r#"<?php
function test() {
$this->logger->warning("failed to find trash item for $rootTrashedItemName deleted at $rootTrashedItemDate in folder $groupFolderId", ['app' => 'groupfolders']);
$logger->info("foobar");
}
?>
"#;
let extractor = LogExtractor::new();
let logs = extractor.extract("foo.php", code).collect::<Vec<_>>();
assert_eq!(
logs[0],
LoggingStatement {
path: "foo.php",
line: 3,
level: LogLevel::Warn,
message_parts: vec![
"failed to find trash item for ",
" deleted at ",
" in folder "
]
}
);
assert_eq!(
logs[1],
LoggingStatement {
path: "foo.php",
line: 4,
level: LogLevel::Info,
message_parts: vec!["foobar"]
}
);
}

View file

@ -0,0 +1,72 @@
use std::fmt::{Display, Formatter};
use serde::{Deserialize, Deserializer, Serialize, Serializer};
#[derive(Debug, Default, PartialEq)]
pub enum LogLevel {
Debug,
Info,
Notice,
Warn,
Error,
Alert,
Critical,
Emergency,
#[default]
Unknown,
}
impl LogLevel {
pub fn parse(name: &str) -> Option<Self> {
match name {
"debug" => Some(LogLevel::Debug),
"info" => Some(LogLevel::Info),
"notice" => Some(LogLevel::Notice),
"warn" | "warning" => Some(LogLevel::Warn),
"error" => Some(LogLevel::Error),
"alert" => Some(LogLevel::Alert),
"critical" => Some(LogLevel::Critical),
"emergency" => Some(LogLevel::Emergency),
"log" => Some(LogLevel::Unknown),
_ => None,
}
}
pub fn as_str(&self) -> &'static str {
match self {
LogLevel::Debug => "debug",
LogLevel::Info => "info",
LogLevel::Notice => "notice",
LogLevel::Warn => "warn",
LogLevel::Error => "error",
LogLevel::Alert => "alert",
LogLevel::Critical => "critical",
LogLevel::Emergency => "emergency",
LogLevel::Unknown => "log",
}
}
}
impl Serialize for LogLevel {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
self.as_str().serialize(serializer)
}
}
impl Display for LogLevel {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
self.as_str().fmt(f)
}
}
impl<'de> Deserialize<'de> for LogLevel {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>
{
let s = <&str>::deserialize(deserializer)?;
Ok(LogLevel::parse(s).unwrap_or_default())
}
}

View file

@ -0,0 +1,62 @@
use crate::error::Error;
use crate::extractor::LogExtractor;
use serde::{Deserialize, Serialize};
use std::fs::File;
use std::io::{Read, Write};
use walkdir::WalkDir;
pub mod error;
pub mod extractor;
mod level;
pub use level::LogLevel;
#[derive(Debug, PartialEq, Serialize, Deserialize)]
pub struct LoggingStatement<'a> {
level: LogLevel,
path: &'a str,
line: usize,
message_parts: Vec<&'a str>,
}
pub fn extract_dir<W: Write>(root: &str, mut output: W) -> Result<(), Error> {
let mut code_buff = String::with_capacity(32 * 1024 * 1024);
writeln!(&mut output, "[").ok();
let mut first_line = true;
let extractor = LogExtractor::new();
for file in WalkDir::new(root).into_iter().flatten() {
let path = file.path();
if let Some(path) = path.to_str() {
if path.ends_with(".php") {
code_buff.clear();
let rel_path = &path[root.len()..];
let mut fh = File::open(path).map_err(|err| Error::Open {
path: path.into(),
err,
})?;
fh.read_to_string(&mut code_buff)
.map_err(|err| Error::Read {
path: path.into(),
err,
})?;
for log_item in extractor.extract(rel_path, &code_buff) {
if !first_line {
writeln!(&mut output, ",").ok();
}
first_line = false;
let _ = serde_json::to_writer(&mut output, &log_item);
}
}
}
}
writeln!(&mut output, "\n]").ok();
Ok(())
}

View file

@ -0,0 +1,24 @@
use clap::Parser;
use logging_extractor::error::Error;
use logging_extractor::extract_dir;
use std::fs::canonicalize;
use std::io::stdout;
use std::path::PathBuf;
#[derive(Parser, Debug)]
struct Args {
root: PathBuf,
}
fn main() -> Result<(), Error> {
let args = Args::parse();
let root = canonicalize(&args.root).map_err(|err| Error::RealPath {
path: args.root,
err,
})?;
let root = root.to_str().expect("non utf8 root path");
let stdout = stdout();
extract_dir(root, stdout)
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,12 @@
use logging_extractor::{extract_dir, LoggingStatement};
use test_case::test_case;
#[test_case("DefaultShareProvider")]
fn snapshot_test(name: &str) {
let root = format!("test-data/{name}");
let mut output = Vec::<u8>::with_capacity(1024 * 1024);
extract_dir(&root, &mut output).unwrap();
let output: Vec<LoggingStatement> = serde_json::from_slice(&output).unwrap();
insta::assert_json_snapshot!(output)
}

View file

@ -0,0 +1,57 @@
---
source: tests/snapshot.rs
expression: output
---
[
{
"level": "error",
"path": "/DefaultShareProvider.php",
"line": 1171,
"message_parts": []
},
{
"level": "debug",
"path": "/DefaultShareProvider.php",
"line": 1392,
"message_parts": [
"Share notification not sent to ",
" because user could not be found."
]
},
{
"level": "debug",
"path": "/DefaultShareProvider.php",
"line": 1401,
"message_parts": [
"Share notification not sent to ",
" because email address is not set."
]
},
{
"level": "debug",
"path": "/DefaultShareProvider.php",
"line": 1416,
"message_parts": [
"Sent share notification to ",
" for share with ID ",
"."
]
},
{
"level": "error",
"path": "/DefaultShareProvider.php",
"line": 1420,
"message_parts": [
"Share notification mail could not be sent."
]
},
{
"level": "error",
"path": "/DefaultShareProvider.php",
"line": 1507,
"message_parts": [
"Share notification mail could not be sent to: ",
", "
]
}
]

3
overlay.nix Normal file
View file

@ -0,0 +1,3 @@
final: prev: {
shelve = final.callPackage ./package.nix {};
}

3
src/main.rs Normal file
View file

@ -0,0 +1,3 @@
fn main() {
println!("Hello, world!");
}