mirror of
https://codeberg.org/icewind/galton.git
synced 2026-06-03 18:34:08 +02:00
add xpath extractor
This commit is contained in:
parent
e20f0d7661
commit
b423323473
8 changed files with 1198 additions and 57 deletions
|
|
@ -1,8 +1,11 @@
|
|||
mod xpath;
|
||||
|
||||
use crate::file::FileInfo;
|
||||
use std::borrow::{Borrow, Cow};
|
||||
use std::collections::HashMap;
|
||||
use std::error::Error;
|
||||
use std::hash::Hash;
|
||||
pub use xpath::XPathExtractor;
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct MultiExtractor<'a> {
|
||||
|
|
|
|||
132
src/extractor/xpath.rs
Normal file
132
src/extractor/xpath.rs
Normal file
|
|
@ -0,0 +1,132 @@
|
|||
use crate::extractor::Extractor;
|
||||
use crate::file::{FileError, FileInfo};
|
||||
use std::borrow::Cow;
|
||||
use std::error::Error;
|
||||
use std::path::PathBuf;
|
||||
use thiserror::Error;
|
||||
use xrust::parser::xml::parse as xmlparse;
|
||||
use xrust::parser::xpath::parse;
|
||||
use xrust::transform::context::{ContextBuilder, StaticContextBuilder};
|
||||
use xrust::trees::smite::RNode;
|
||||
use xrust::{Error as XPathParseError, Item, Node, SequenceTrait};
|
||||
|
||||
pub struct XPathExtractor<'a> {
|
||||
file: &'a FileInfo,
|
||||
}
|
||||
|
||||
impl<'a> XPathExtractor<'a> {
|
||||
pub fn new(file: &'a FileInfo) -> XPathExtractor<'a> {
|
||||
XPathExtractor { file }
|
||||
}
|
||||
}
|
||||
|
||||
impl Extractor for XPathExtractor<'_> {
|
||||
fn extract<'this>(&'this self, field: &str) -> Option<Result<Cow<'this, str>, Box<dyn Error>>> {
|
||||
let query = field
|
||||
.strip_prefix("xpath('")
|
||||
.and_then(|query| query.strip_suffix("')"))?;
|
||||
let transform = match parse::<RNode>(query, None) {
|
||||
Ok(transform) => transform,
|
||||
Err(error) => {
|
||||
return Some(Err(XPathError::Parse {
|
||||
query: query.into(),
|
||||
error,
|
||||
}
|
||||
.into()));
|
||||
}
|
||||
};
|
||||
|
||||
let content = match self.file.str_content() {
|
||||
Ok(content) => content,
|
||||
Err(error) => return Some(Err(XPathError::ReadFile(error).into())),
|
||||
};
|
||||
let xml = match xmlparse(RNode::new_document(), content, None) {
|
||||
Ok(xml) => xml,
|
||||
Err(error) => {
|
||||
return Some(Err(XPathError::ParseXml {
|
||||
error,
|
||||
path: self.file.path.clone().into(),
|
||||
}
|
||||
.into()));
|
||||
}
|
||||
};
|
||||
|
||||
let mut static_context = StaticContextBuilder::new()
|
||||
.message(|_| Ok(()))
|
||||
.fetcher(|_| Ok(String::new()))
|
||||
.parser(|_| unreachable!())
|
||||
.build();
|
||||
let context = ContextBuilder::new().context(vec![Item::Node(xml)]).build();
|
||||
let sequence = match context.dispatch(&mut static_context, &transform) {
|
||||
Ok(res) => res,
|
||||
Err(error) => {
|
||||
return Some(Err(XPathError::MatchError {
|
||||
error,
|
||||
query: query.into(),
|
||||
}
|
||||
.into()));
|
||||
}
|
||||
};
|
||||
|
||||
Some(Ok(sequence.to_xml().into()))
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Error)]
|
||||
enum XPathError {
|
||||
#[error("Failed to parse xpath '{query}': {error:#}")]
|
||||
Parse {
|
||||
error: XPathParseError,
|
||||
query: String,
|
||||
},
|
||||
#[error(transparent)]
|
||||
ReadFile(FileError),
|
||||
#[error("Failed to parse xml '{}': {error:#}", path.display())]
|
||||
ParseXml {
|
||||
error: XPathParseError,
|
||||
path: PathBuf,
|
||||
},
|
||||
#[error("Failed to match xpath '{query}': {error:#}")]
|
||||
MatchError {
|
||||
error: XPathParseError,
|
||||
query: String,
|
||||
},
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_xpath() {
|
||||
let xml = r#"<?xml version="1.0" encoding="UTF-8"?>
|
||||
<Run version="1.7.0">
|
||||
<GameIcon></GameIcon>
|
||||
<GameName>Hollow Knight: Silksong</GameName>
|
||||
<CategoryName>Any%</CategoryName>
|
||||
<Metadata>
|
||||
<Run id=""/>
|
||||
<Platform usesEmulator="False"/>
|
||||
<Variables>
|
||||
<Variable name="Any% Subcategory">No Major Glitches</Variable>
|
||||
</Variables>
|
||||
</Metadata>
|
||||
<Offset>00:00:00</Offset>
|
||||
</Run>"#;
|
||||
|
||||
use std::cell::OnceCell;
|
||||
|
||||
let xml_file = FileInfo {
|
||||
path: "/tmp/test.lss".into(),
|
||||
url: None,
|
||||
referrer: None,
|
||||
sha256: [0; 32],
|
||||
mtime: 1234,
|
||||
size: 100,
|
||||
content: OnceCell::from(xml.as_bytes().to_vec()),
|
||||
};
|
||||
let matcher = XPathExtractor::new(&xml_file);
|
||||
assert_eq!(
|
||||
"Hollow Knight: Silksong",
|
||||
matcher
|
||||
.extract("xpath('//GameName/text()')")
|
||||
.unwrap()
|
||||
.unwrap()
|
||||
);
|
||||
}
|
||||
39
src/file.rs
39
src/file.rs
|
|
@ -1,9 +1,11 @@
|
|||
use hex::FromHex;
|
||||
use sha2::{Digest, Sha256};
|
||||
use std::fs::File;
|
||||
use std::cell::OnceCell;
|
||||
use std::fs::{read, File};
|
||||
use std::io::Read;
|
||||
use std::os::unix::fs::MetadataExt;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::str::Utf8Error;
|
||||
use std::time::UNIX_EPOCH;
|
||||
use thiserror::Error;
|
||||
|
||||
|
|
@ -15,6 +17,7 @@ pub struct FileInfo {
|
|||
#[allow(dead_code)]
|
||||
pub mtime: u64,
|
||||
pub size: u64,
|
||||
pub content: OnceCell<Vec<u8>>,
|
||||
}
|
||||
|
||||
impl FileInfo {
|
||||
|
|
@ -46,6 +49,7 @@ impl FileInfo {
|
|||
sha256: [0; 32],
|
||||
mtime,
|
||||
size: stat.size(),
|
||||
content: OnceCell::new(),
|
||||
};
|
||||
|
||||
let attributes = xattr::list(path).unwrap_or_default();
|
||||
|
|
@ -119,6 +123,30 @@ impl FileInfo {
|
|||
error,
|
||||
})?)
|
||||
}
|
||||
|
||||
fn read_content(&self) -> Result<Vec<u8>, FileError> {
|
||||
let path = Path::new(&self.path);
|
||||
read(path).map_err(|error| FileError::Read {
|
||||
path: path.into(),
|
||||
error,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn content(&self) -> Result<&[u8], FileError> {
|
||||
if let Some(content) = self.content.get() {
|
||||
return Ok(content);
|
||||
}
|
||||
self.content.set(self.read_content()?).unwrap();
|
||||
Ok(self.content.get().unwrap())
|
||||
}
|
||||
|
||||
pub fn str_content(&self) -> Result<&str, FileError> {
|
||||
let raw = self.content()?;
|
||||
std::str::from_utf8(raw).map_err(|error| FileError::Utf8 {
|
||||
error,
|
||||
path: self.path.clone().into(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
fn load_or_calculate_hash(path: impl AsRef<Path>) -> Result<[u8; 32], std::io::Error> {
|
||||
|
|
@ -163,9 +191,16 @@ pub enum FileError {
|
|||
path: PathBuf,
|
||||
error: std::io::Error,
|
||||
},
|
||||
#[error("Failed to store hash for: {}", path.display())]
|
||||
#[error("Failed to store hash for {}: {error:#}", path.display())]
|
||||
StoreHash {
|
||||
path: PathBuf,
|
||||
error: std::io::Error,
|
||||
},
|
||||
#[error("Failed to read {}: {error:#}", path.display())]
|
||||
Read {
|
||||
path: PathBuf,
|
||||
error: std::io::Error,
|
||||
},
|
||||
#[error("File {} is not valid utf8: {error:#}", path.display())]
|
||||
Utf8 { error: Utf8Error, path: PathBuf },
|
||||
}
|
||||
|
|
|
|||
|
|
@ -80,6 +80,8 @@ impl Extractor for RegexMatchExtractor {
|
|||
|
||||
#[test]
|
||||
fn test_file_meta_matcher() {
|
||||
use std::cell::OnceCell;
|
||||
|
||||
let txt_file = FileInfo {
|
||||
path: "/tmp/test.txt".into(),
|
||||
url: Some("https://example.com/test.txt".into()),
|
||||
|
|
@ -87,6 +89,7 @@ fn test_file_meta_matcher() {
|
|||
sha256: [0; 32],
|
||||
mtime: 1234,
|
||||
size: 100,
|
||||
content: OnceCell::new(),
|
||||
};
|
||||
let png_file = FileInfo {
|
||||
path: "/tmp/test.png".into(),
|
||||
|
|
@ -95,6 +98,7 @@ fn test_file_meta_matcher() {
|
|||
sha256: [0; 32],
|
||||
mtime: 1234,
|
||||
size: 100,
|
||||
content: OnceCell::new(),
|
||||
};
|
||||
|
||||
let txt_matcher = parse("name", r#"(?<txt_name>.+)\.txt"#).unwrap().unwrap();
|
||||
|
|
|
|||
12
src/rule.rs
12
src/rule.rs
|
|
@ -1,4 +1,4 @@
|
|||
use crate::extractor::{Extractor, FileInfoExtractor, MultiExtractor};
|
||||
use crate::extractor::{Extractor, FileInfoExtractor, MultiExtractor, XPathExtractor};
|
||||
use crate::file::FileInfo;
|
||||
use crate::matchers::Matcher;
|
||||
use regex::Regex;
|
||||
|
|
@ -24,8 +24,11 @@ pub struct RuleResult {
|
|||
impl Rule {
|
||||
pub fn matches(&self, file: &FileInfo) -> Option<Result<RuleResult, RuleMatchError>> {
|
||||
let file_extractor = FileInfoExtractor::new(file);
|
||||
let mut extractors = MultiExtractor::with_capacity(self.matchers.len() + 1);
|
||||
let xpath_extractor = XPathExtractor::new(file);
|
||||
|
||||
let mut extractors = MultiExtractor::with_capacity(self.matchers.len() + 2);
|
||||
extractors.push(&file_extractor);
|
||||
extractors.push(&xpath_extractor);
|
||||
|
||||
for matcher in &self.matchers {
|
||||
match matcher.matches(file)? {
|
||||
|
|
@ -61,7 +64,7 @@ static SUBST_REGEX: OnceLock<Regex> = OnceLock::new();
|
|||
|
||||
fn apply_extractors<E: Extractor>(input: &str, extractor: &E) -> Result<String, RuleMatchError> {
|
||||
let subst_regex =
|
||||
SUBST_REGEX.get_or_init(|| Regex::new(r#"\$(\{([^})]+)}|([a-zA-Z0-9]+))"#).unwrap());
|
||||
SUBST_REGEX.get_or_init(|| Regex::new(r#"\$(\{([^}]+)}|([a-zA-Z0-9]+))"#).unwrap());
|
||||
|
||||
// copied from `Regex::replace_all` adjusted to support returning errors
|
||||
let mut it = subst_regex.captures_iter(input).enumerate().peekable();
|
||||
|
|
@ -121,6 +124,8 @@ fn test_apply_extractors() {
|
|||
#[test]
|
||||
fn test_rule() {
|
||||
use crate::matchers::get_matcher;
|
||||
use std::cell::OnceCell;
|
||||
|
||||
let rule = Rule {
|
||||
matchers: vec![get_matcher("name", r#"\.txt"#).unwrap().unwrap()],
|
||||
target: Some("/target/dir".into()),
|
||||
|
|
@ -134,6 +139,7 @@ fn test_rule() {
|
|||
sha256: [0; 32],
|
||||
mtime: 1234,
|
||||
size: 100,
|
||||
content: OnceCell::new(),
|
||||
};
|
||||
|
||||
let result = rule.matches(&txt_file).unwrap().unwrap();
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue