mirror of
https://codeberg.org/icewind/logsmash.git
synced 2026-06-03 10:04:12 +02:00
printf placeholder extraction
This commit is contained in:
parent
b24297dfd5
commit
e992588923
8 changed files with 341 additions and 234 deletions
10
logging-extractor/Cargo.lock
generated
10
logging-extractor/Cargo.lock
generated
|
|
@ -120,6 +120,7 @@ dependencies = [
|
|||
"regex-syntax",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"sprintf",
|
||||
"test-case",
|
||||
"thiserror",
|
||||
"tracing",
|
||||
|
|
@ -283,6 +284,15 @@ version = "1.13.2"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67"
|
||||
|
||||
[[package]]
|
||||
name = "sprintf"
|
||||
version = "0.3.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "39b60413e681681e22dfe3966674082c075c5f6ed73c3900b95dd19eb9e0181d"
|
||||
dependencies = [
|
||||
"thiserror",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "2.0.71"
|
||||
|
|
|
|||
|
|
@ -22,6 +22,7 @@ memchr = "2.7.4"
|
|||
databake = { version = "0.1.8", features = ["derive"] }
|
||||
regex-syntax = "0.8.4"
|
||||
regex = "1.10.5"
|
||||
sprintf = "0.3.1"
|
||||
|
||||
[build-dependencies]
|
||||
cc = "1.1.6"
|
||||
|
|
|
|||
|
|
@ -245,6 +245,7 @@ fn test_extract_logging() {
|
|||
throw new SomeException();
|
||||
throw new \SomeException();
|
||||
$this->logger->error("foo {bar} {asd}");
|
||||
$this->logger->error($this->l10n->t("translated %s", $foo));
|
||||
}
|
||||
?>
|
||||
"#;
|
||||
|
|
@ -367,4 +368,18 @@ fn test_extract_logging() {
|
|||
]
|
||||
}
|
||||
);
|
||||
assert_eq!(
|
||||
logs[9],
|
||||
LoggingStatement {
|
||||
path: "foo.php",
|
||||
line: 16,
|
||||
level: LogLevel::Error,
|
||||
has_meaningful_message: true,
|
||||
exception: None,
|
||||
message_parts: vec![
|
||||
MessagePart::Literal("translated ".into()),
|
||||
MessagePart::PlaceHolder("$foo".into()),
|
||||
]
|
||||
}
|
||||
);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,7 +1,8 @@
|
|||
use crate::string::{unescape, DoubleQuoteString, SingleQuoteString};
|
||||
use crate::MessagePart;
|
||||
use regex::Regex;
|
||||
use tree_sitter::Node;
|
||||
use sprintf::parser::{parse_format_string, FormatElement};
|
||||
use tree_sitter::{Node, TreeCursor};
|
||||
|
||||
pub struct MessageBuilder {
|
||||
pub parts: Vec<MessagePart>,
|
||||
|
|
@ -46,46 +47,45 @@ impl MessageBuilder {
|
|||
Self::push_placeholder_inner(&mut self.parts, placeholder);
|
||||
}
|
||||
|
||||
pub fn push_printf<'a, Args: Iterator<Item = &'a str>>(
|
||||
&mut self,
|
||||
string: &str,
|
||||
placeholders: &mut Args,
|
||||
) {
|
||||
if let Ok(format_elements) = parse_format_string(string) {
|
||||
for element in format_elements {
|
||||
match element {
|
||||
FormatElement::Verbatim(str) => Self::push_literal_inner(&mut self.parts, &str),
|
||||
FormatElement::Format(_) => Self::push_placeholder_inner(
|
||||
&mut self.parts,
|
||||
placeholders.next().unwrap_or_default(),
|
||||
),
|
||||
}
|
||||
}
|
||||
} else {
|
||||
Self::push_placeholder_inner(&mut self.parts, string);
|
||||
}
|
||||
}
|
||||
|
||||
fn push_placeholder_inner(parts: &mut Vec<MessagePart>, placeholder: &str) {
|
||||
let placeholder = placeholder.replace(['\n', '\r', '\t'], "");
|
||||
parts
|
||||
.push(MessagePart::PlaceHolder(placeholder));
|
||||
parts.push(MessagePart::PlaceHolder(placeholder));
|
||||
}
|
||||
|
||||
fn extend<I: Iterator<Item = MessagePart>>(&mut self, parts: I) {
|
||||
for part in parts {
|
||||
match part {
|
||||
MessagePart::Literal(lit) => self.push_literal(&lit),
|
||||
MessagePart::PlaceHolder(placeholder) => self.push_placeholder(&placeholder),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn push_node(&mut self, node: Node, code: &str) {
|
||||
let mut cursor = node.walk();
|
||||
match node.grammar_name() {
|
||||
"string" | "encapsed_string" => {
|
||||
let mut argument_string_parts = node.children(&mut cursor);
|
||||
let is_double_quote = argument_string_parts
|
||||
.next()
|
||||
.map(|child| child.grammar_name())
|
||||
.unwrap_or_default()
|
||||
== r#"""#;
|
||||
|
||||
for string_part in argument_string_parts {
|
||||
match string_part.grammar_name() {
|
||||
"string_content" => {
|
||||
let content = string_part.utf8_text(code.as_bytes()).unwrap();
|
||||
self.push_literal(content);
|
||||
}
|
||||
"escape_sequence" => {
|
||||
let raw = string_part.utf8_text(code.as_bytes()).unwrap();
|
||||
let content = if is_double_quote {
|
||||
unescape::<DoubleQuoteString>(raw)
|
||||
} else {
|
||||
unescape::<SingleQuoteString>(raw)
|
||||
}
|
||||
.unwrap();
|
||||
self.push_literal(&content);
|
||||
}
|
||||
r#"'"# | r#"""# | r#"{"# | r#"}"# => {}
|
||||
_ => {
|
||||
let placeholder = string_part.utf8_text(code.as_bytes()).unwrap();
|
||||
self.push_placeholder(placeholder);
|
||||
}
|
||||
}
|
||||
}
|
||||
self.extend(string_parts(node, code, &mut cursor).into_iter());
|
||||
}
|
||||
"binary_expression" => {
|
||||
let start = node.named_child(0).unwrap().range().end_byte;
|
||||
|
|
@ -97,6 +97,36 @@ impl MessageBuilder {
|
|||
}
|
||||
}
|
||||
}
|
||||
"member_call_expression" => {
|
||||
match node
|
||||
.child_by_field_name("name")
|
||||
.and_then(|name| name.utf8_text(code.as_bytes()).ok())
|
||||
{
|
||||
Some("t") => {
|
||||
let arguments =
|
||||
node.child_by_field_name("arguments").expect("no arguments");
|
||||
let mut arguments = arguments.children(&mut cursor).skip(1); // opening bracket
|
||||
let mut cursor = node.walk();
|
||||
let fmt = string_parts(arguments.next().unwrap().child(0).unwrap(), code, &mut cursor);
|
||||
let mut arguments = arguments.filter_map(|arg| {
|
||||
(arg.grammar_name() != ",")
|
||||
.then(|| arg.utf8_text(code.as_bytes()).unwrap())
|
||||
});
|
||||
for part in fmt {
|
||||
match part {
|
||||
MessagePart::Literal(lit) => self.push_printf(&lit, &mut arguments),
|
||||
MessagePart::PlaceHolder(placeholder) => {
|
||||
self.push_placeholder(&placeholder)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
let placeholder = node.utf8_text(code.as_bytes()).unwrap();
|
||||
self.push_placeholder(placeholder);
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
let placeholder = node.utf8_text(code.as_bytes()).unwrap();
|
||||
self.push_placeholder(placeholder);
|
||||
|
|
@ -115,3 +145,54 @@ impl From<MessageBuilder> for Vec<MessagePart> {
|
|||
value.parts
|
||||
}
|
||||
}
|
||||
|
||||
fn string_parts<'cursor, 'node: 'cursor>(
|
||||
node: Node<'node>,
|
||||
code: &str,
|
||||
cursor: &mut TreeCursor<'cursor>,
|
||||
) -> Vec<MessagePart> {
|
||||
let mut argument_string_parts = node.children(cursor);
|
||||
let is_double_quote = argument_string_parts
|
||||
.next()
|
||||
.map(|child| child.grammar_name())
|
||||
.unwrap_or_default()
|
||||
== r#"""#;
|
||||
|
||||
argument_string_parts
|
||||
.filter_map(move |string_part| match string_part.grammar_name() {
|
||||
"string_content" => {
|
||||
let content = string_part.utf8_text(code.as_bytes()).unwrap();
|
||||
Some(MessagePart::Literal(content.into()))
|
||||
}
|
||||
"escape_sequence" => {
|
||||
let raw = string_part.utf8_text(code.as_bytes()).unwrap();
|
||||
let content = if is_double_quote {
|
||||
unescape::<DoubleQuoteString>(raw)
|
||||
} else {
|
||||
unescape::<SingleQuoteString>(raw)
|
||||
}
|
||||
.unwrap();
|
||||
Some(MessagePart::Literal(content.into()))
|
||||
}
|
||||
r#"'"# | r#"""# | r#"{"# | r#"}"# => None,
|
||||
_ => {
|
||||
let placeholder = string_part.utf8_text(code.as_bytes()).unwrap();
|
||||
Some(MessagePart::PlaceHolder(placeholder.into()))
|
||||
}
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_printf() {
|
||||
let mut builder = MessageBuilder::with_capacity(4);
|
||||
builder.push_printf("test %s foo", &mut ["$name"].into_iter());
|
||||
assert_eq!(
|
||||
vec![
|
||||
MessagePart::Literal("test ".into()),
|
||||
MessagePart::PlaceHolder("$name".into()),
|
||||
MessagePart::Literal(" foo".into())
|
||||
],
|
||||
builder.parts
|
||||
)
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue