extract more sprintf style pattern

This commit is contained in:
Robin Appelman 2025-06-17 23:32:54 +02:00
commit c3ebe018cd
4 changed files with 44 additions and 14 deletions

View file

@ -117,7 +117,6 @@ dependencies = [
"maplit", "maplit",
"memchr", "memchr",
"regex", "regex",
"regex-syntax",
"serde", "serde",
"serde_json", "serde_json",
"sprintf", "sprintf",

View file

@ -20,7 +20,6 @@ tree-sitter = "0.22.6"
tree-sitter-php = "0.22.7" tree-sitter-php = "0.22.7"
memchr = "2.7.4" memchr = "2.7.4"
databake = { version = "0.1.8", features = ["derive"] } databake = { version = "0.1.8", features = ["derive"] }
regex-syntax = "0.8.4"
regex = "1.10.5" regex = "1.10.5"
sprintf = "0.3.1" sprintf = "0.3.1"

View file

@ -115,7 +115,7 @@ impl LogExtractor {
cursor: &'a mut QueryCursor, cursor: &'a mut QueryCursor,
code: &'a str, code: &'a str,
node: Node<'a>, node: Node<'a>,
) -> impl Iterator<Item = LogCall> + 'a { ) -> impl Iterator<Item = LogCall<'a>> + 'a {
let method_calls = cursor.matches(&self.method_query, node, code.as_bytes()); let method_calls = cursor.matches(&self.method_query, node, code.as_bytes());
method_calls.filter_map(|method_call| { method_calls.filter_map(|method_call| {
@ -141,7 +141,7 @@ impl LogExtractor {
cursor: &'a mut QueryCursor, cursor: &'a mut QueryCursor,
code: &'a str, code: &'a str,
node: Node<'a>, node: Node<'a>,
) -> impl Iterator<Item = LogCall> + 'a { ) -> impl Iterator<Item = LogCall<'a>> + 'a {
let throws = cursor.matches(&self.throw_query, node, code.as_bytes()); let throws = cursor.matches(&self.throw_query, node, code.as_bytes());
throws.map(|method_call| { throws.map(|method_call| {
@ -281,6 +281,7 @@ fn test_extract_logging() {
throw new InvalidArgumentException(sprintf('Argument "%s" not found.', $key)); throw new InvalidArgumentException(sprintf('Argument "%s" not found.', $key));
$baseMsg = 'Could not resolve ' . $name . '!'; $baseMsg = 'Could not resolve ' . $name . '!';
throw new QueryNotFoundException($baseMsg . ' ' . $e->getMessage()); throw new QueryNotFoundException($baseMsg . ' ' . $e->getMessage());
$this->log("foo %s");
} }
?> ?>
"#; "#;
@ -448,4 +449,18 @@ fn test_extract_logging() {
] ]
} }
); );
assert_eq!(
logs[12],
LoggingStatement {
path: "foo.php",
line: 20,
level: LogLevel::Unknown,
has_meaningful_message: true,
exception: None,
message_parts: vec![
MessagePart::Literal("foo ".into()),
MessagePart::PlaceHolder("".into()),
]
}
);
} }

View file

@ -19,24 +19,32 @@ impl MessageBuilder {
} }
pub fn push_literal(&mut self, content: &str) { pub fn push_literal(&mut self, content: &str) {
if self.placeholder_regex.is_match(content) { Self::push_literal_inner(&self.placeholder_regex, &mut self.parts, content)
}
pub fn push_literal_inner(
placeholder_regex: &Regex,
parts: &mut Vec<MessagePart>,
content: &str,
) {
if placeholder_regex.is_match(content) {
let mut start = 0; let mut start = 0;
for placeholder in self.placeholder_regex.find_iter(content) { for placeholder in placeholder_regex.find_iter(content) {
if placeholder.start() > start { if placeholder.start() > start {
Self::push_literal_inner(&mut self.parts, &content[start..placeholder.start()]); Self::push_bare_literal_inner(parts, &content[start..placeholder.start()]);
Self::push_placeholder_inner(&mut self.parts, placeholder.as_str()); Self::push_placeholder_inner(parts, placeholder.as_str());
} }
start = placeholder.end(); start = placeholder.end();
} }
if start < content.len() { if start < content.len() {
Self::push_literal_inner(&mut self.parts, &content[start..]) Self::push_bare_literal_inner(parts, &content[start..])
} }
} else { } else {
Self::push_literal_inner(&mut self.parts, content) Self::push_bare_literal_inner(parts, content)
} }
} }
fn push_literal_inner(parts: &mut Vec<MessagePart>, content: &str) { fn push_bare_literal_inner(parts: &mut Vec<MessagePart>, content: &str) {
if let Some(MessagePart::Literal(last_part)) = parts.last_mut() { if let Some(MessagePart::Literal(last_part)) = parts.last_mut() {
last_part.push_str(content); last_part.push_str(content);
} else { } else {
@ -56,7 +64,9 @@ impl MessageBuilder {
if let Ok(format_elements) = parse_format_string(string) { if let Ok(format_elements) = parse_format_string(string) {
for element in format_elements { for element in format_elements {
match element { match element {
FormatElement::Verbatim(str) => Self::push_literal_inner(&mut self.parts, &str), FormatElement::Verbatim(str) => {
Self::push_literal_inner(&self.placeholder_regex, &mut self.parts, &str)
}
FormatElement::Format(_) => Self::push_placeholder_inner( FormatElement::Format(_) => Self::push_placeholder_inner(
&mut self.parts, &mut self.parts,
placeholders.next().unwrap_or_default(), placeholders.next().unwrap_or_default(),
@ -73,7 +83,7 @@ impl MessageBuilder {
parts.push(MessagePart::PlaceHolder(placeholder)); parts.push(MessagePart::PlaceHolder(placeholder));
} }
fn extend<I: Iterator<Item = MessagePart>>(&mut self, parts: I) { fn extend<I: IntoIterator<Item = MessagePart>>(&mut self, parts: I) {
for part in parts { for part in parts {
match part { match part {
MessagePart::Literal(lit) => self.push_literal(&lit), MessagePart::Literal(lit) => self.push_literal(&lit),
@ -86,7 +96,14 @@ impl MessageBuilder {
let mut cursor = node.walk(); let mut cursor = node.walk();
match node.grammar_name() { match node.grammar_name() {
"string" | "encapsed_string" => { "string" | "encapsed_string" => {
self.extend(string_parts(node, code, &mut cursor).into_iter()); for part in string_parts(node, code, &mut cursor) {
match part {
MessagePart::Literal(lit) => self.push_printf(&lit, &mut [].into_iter()),
MessagePart::PlaceHolder(placeholder) => {
self.push_placeholder(&placeholder)
}
}
}
} }
"binary_expression" => { "binary_expression" => {
let start = node.named_child(0).unwrap().range().end_byte; let start = node.named_child(0).unwrap().range().end_byte;