printf placeholder extraction

This commit is contained in:
Robin Appelman 2024-09-15 11:02:57 +02:00
commit e992588923
8 changed files with 341 additions and 234 deletions

View file

@ -120,6 +120,7 @@ dependencies = [
"regex-syntax",
"serde",
"serde_json",
"sprintf",
"test-case",
"thiserror",
"tracing",
@ -283,6 +284,15 @@ version = "1.13.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67"
[[package]]
name = "sprintf"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "39b60413e681681e22dfe3966674082c075c5f6ed73c3900b95dd19eb9e0181d"
dependencies = [
"thiserror",
]
[[package]]
name = "syn"
version = "2.0.71"

View file

@ -22,6 +22,7 @@ memchr = "2.7.4"
databake = { version = "0.1.8", features = ["derive"] }
regex-syntax = "0.8.4"
regex = "1.10.5"
sprintf = "0.3.1"
[build-dependencies]
cc = "1.1.6"

View file

@ -245,6 +245,7 @@ fn test_extract_logging() {
throw new SomeException();
throw new \SomeException();
$this->logger->error("foo {bar} {asd}");
$this->logger->error($this->l10n->t("translated %s", $foo));
}
?>
"#;
@ -367,4 +368,18 @@ fn test_extract_logging() {
]
}
);
assert_eq!(
logs[9],
LoggingStatement {
path: "foo.php",
line: 16,
level: LogLevel::Error,
has_meaningful_message: true,
exception: None,
message_parts: vec![
MessagePart::Literal("translated ".into()),
MessagePart::PlaceHolder("$foo".into()),
]
}
);
}

View file

@ -1,7 +1,8 @@
use crate::string::{unescape, DoubleQuoteString, SingleQuoteString};
use crate::MessagePart;
use regex::Regex;
use tree_sitter::Node;
use sprintf::parser::{parse_format_string, FormatElement};
use tree_sitter::{Node, TreeCursor};
pub struct MessageBuilder {
pub parts: Vec<MessagePart>,
@ -46,46 +47,45 @@ impl MessageBuilder {
Self::push_placeholder_inner(&mut self.parts, placeholder);
}
pub fn push_printf<'a, Args: Iterator<Item = &'a str>>(
&mut self,
string: &str,
placeholders: &mut Args,
) {
if let Ok(format_elements) = parse_format_string(string) {
for element in format_elements {
match element {
FormatElement::Verbatim(str) => Self::push_literal_inner(&mut self.parts, &str),
FormatElement::Format(_) => Self::push_placeholder_inner(
&mut self.parts,
placeholders.next().unwrap_or_default(),
),
}
}
} else {
Self::push_placeholder_inner(&mut self.parts, string);
}
}
fn push_placeholder_inner(parts: &mut Vec<MessagePart>, placeholder: &str) {
let placeholder = placeholder.replace(['\n', '\r', '\t'], "");
parts
.push(MessagePart::PlaceHolder(placeholder));
parts.push(MessagePart::PlaceHolder(placeholder));
}
fn extend<I: Iterator<Item = MessagePart>>(&mut self, parts: I) {
for part in parts {
match part {
MessagePart::Literal(lit) => self.push_literal(&lit),
MessagePart::PlaceHolder(placeholder) => self.push_placeholder(&placeholder),
}
}
}
pub fn push_node(&mut self, node: Node, code: &str) {
let mut cursor = node.walk();
match node.grammar_name() {
"string" | "encapsed_string" => {
let mut argument_string_parts = node.children(&mut cursor);
let is_double_quote = argument_string_parts
.next()
.map(|child| child.grammar_name())
.unwrap_or_default()
== r#"""#;
for string_part in argument_string_parts {
match string_part.grammar_name() {
"string_content" => {
let content = string_part.utf8_text(code.as_bytes()).unwrap();
self.push_literal(content);
}
"escape_sequence" => {
let raw = string_part.utf8_text(code.as_bytes()).unwrap();
let content = if is_double_quote {
unescape::<DoubleQuoteString>(raw)
} else {
unescape::<SingleQuoteString>(raw)
}
.unwrap();
self.push_literal(&content);
}
r#"'"# | r#"""# | r#"{"# | r#"}"# => {}
_ => {
let placeholder = string_part.utf8_text(code.as_bytes()).unwrap();
self.push_placeholder(placeholder);
}
}
}
self.extend(string_parts(node, code, &mut cursor).into_iter());
}
"binary_expression" => {
let start = node.named_child(0).unwrap().range().end_byte;
@ -97,6 +97,36 @@ impl MessageBuilder {
}
}
}
"member_call_expression" => {
match node
.child_by_field_name("name")
.and_then(|name| name.utf8_text(code.as_bytes()).ok())
{
Some("t") => {
let arguments =
node.child_by_field_name("arguments").expect("no arguments");
let mut arguments = arguments.children(&mut cursor).skip(1); // opening bracket
let mut cursor = node.walk();
let fmt = string_parts(arguments.next().unwrap().child(0).unwrap(), code, &mut cursor);
let mut arguments = arguments.filter_map(|arg| {
(arg.grammar_name() != ",")
.then(|| arg.utf8_text(code.as_bytes()).unwrap())
});
for part in fmt {
match part {
MessagePart::Literal(lit) => self.push_printf(&lit, &mut arguments),
MessagePart::PlaceHolder(placeholder) => {
self.push_placeholder(&placeholder)
}
}
}
}
_ => {
let placeholder = node.utf8_text(code.as_bytes()).unwrap();
self.push_placeholder(placeholder);
}
}
}
_ => {
let placeholder = node.utf8_text(code.as_bytes()).unwrap();
self.push_placeholder(placeholder);
@ -115,3 +145,54 @@ impl From<MessageBuilder> for Vec<MessagePart> {
value.parts
}
}
fn string_parts<'cursor, 'node: 'cursor>(
node: Node<'node>,
code: &str,
cursor: &mut TreeCursor<'cursor>,
) -> Vec<MessagePart> {
let mut argument_string_parts = node.children(cursor);
let is_double_quote = argument_string_parts
.next()
.map(|child| child.grammar_name())
.unwrap_or_default()
== r#"""#;
argument_string_parts
.filter_map(move |string_part| match string_part.grammar_name() {
"string_content" => {
let content = string_part.utf8_text(code.as_bytes()).unwrap();
Some(MessagePart::Literal(content.into()))
}
"escape_sequence" => {
let raw = string_part.utf8_text(code.as_bytes()).unwrap();
let content = if is_double_quote {
unescape::<DoubleQuoteString>(raw)
} else {
unescape::<SingleQuoteString>(raw)
}
.unwrap();
Some(MessagePart::Literal(content.into()))
}
r#"'"# | r#"""# | r#"{"# | r#"}"# => None,
_ => {
let placeholder = string_part.utf8_text(code.as_bytes()).unwrap();
Some(MessagePart::PlaceHolder(placeholder.into()))
}
})
.collect()
}
#[test]
fn test_printf() {
let mut builder = MessageBuilder::with_capacity(4);
builder.push_printf("test %s foo", &mut ["$name"].into_iter());
assert_eq!(
vec![
MessagePart::Literal("test ".into()),
MessagePart::PlaceHolder("$name".into()),
MessagePart::Literal(" foo".into())
],
builder.parts
)
}