mirror of
https://codeberg.org/icewind/galton.git
synced 2026-06-03 18:34:08 +02:00
prepare for more flexible matchers/extractors
This commit is contained in:
parent
eb4c13a43c
commit
e20f0d7661
9 changed files with 401 additions and 115 deletions
7
Cargo.lock
generated
7
Cargo.lock
generated
|
|
@ -235,6 +235,7 @@ dependencies = [
|
||||||
"hex",
|
"hex",
|
||||||
"home",
|
"home",
|
||||||
"main_error",
|
"main_error",
|
||||||
|
"maplit",
|
||||||
"notify-debouncer-full",
|
"notify-debouncer-full",
|
||||||
"regex",
|
"regex",
|
||||||
"serde",
|
"serde",
|
||||||
|
|
@ -368,6 +369,12 @@ version = "0.1.2"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "155db5e86c6e45ee456bf32fad5a290ee1f7151c2faca27ea27097568da67d1a"
|
checksum = "155db5e86c6e45ee456bf32fad5a290ee1f7151c2faca27ea27097568da67d1a"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "maplit"
|
||||||
|
version = "1.0.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "3e2e65a1a2e43cfcb47a895c4c8b10d1f4a61097f9f254f183aee60cad9c651d"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "memchr"
|
name = "memchr"
|
||||||
version = "2.7.6"
|
version = "2.7.6"
|
||||||
|
|
|
||||||
|
|
@ -19,4 +19,7 @@ tracing-subscriber = "0.3.20"
|
||||||
notify-debouncer-full = "0.6.0"
|
notify-debouncer-full = "0.6.0"
|
||||||
ctrlc = "3.5.0"
|
ctrlc = "3.5.0"
|
||||||
sha2 = "0.11.0-rc.2"
|
sha2 = "0.11.0-rc.2"
|
||||||
hex = "0.4.3"
|
hex = "0.4.3"
|
||||||
|
|
||||||
|
[dev-dependencies]
|
||||||
|
maplit = "1.0.2"
|
||||||
|
|
@ -1,7 +1,9 @@
|
||||||
|
use crate::matchers::get_matcher;
|
||||||
use crate::rule::Rule;
|
use crate::rule::Rule;
|
||||||
use home::home_dir;
|
use home::home_dir;
|
||||||
use regex::Regex;
|
|
||||||
use serde::Deserialize;
|
use serde::Deserialize;
|
||||||
|
use std::collections::HashMap;
|
||||||
|
use std::error::Error;
|
||||||
use std::fs::read_to_string;
|
use std::fs::read_to_string;
|
||||||
use std::path::{Path, PathBuf};
|
use std::path::{Path, PathBuf};
|
||||||
use thiserror::Error;
|
use thiserror::Error;
|
||||||
|
|
@ -55,39 +57,40 @@ pub fn normalize_path<P: Into<String> + AsRef<str>>(path: P) -> String {
|
||||||
|
|
||||||
#[derive(Debug, Deserialize)]
|
#[derive(Debug, Deserialize)]
|
||||||
pub(crate) struct RuleConfig {
|
pub(crate) struct RuleConfig {
|
||||||
name: Option<String>,
|
|
||||||
referrer: Option<String>,
|
|
||||||
url: Option<String>,
|
|
||||||
#[serde(rename = "move")]
|
#[serde(rename = "move")]
|
||||||
target: Option<String>,
|
pub target: Option<String>,
|
||||||
rename: Option<String>,
|
pub rename: Option<String>,
|
||||||
|
#[serde(flatten)]
|
||||||
|
pub matchers: HashMap<String, String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl TryFrom<RuleConfig> for Rule {
|
impl TryFrom<RuleConfig> for Rule {
|
||||||
type Error = RuleError;
|
type Error = RuleError;
|
||||||
|
|
||||||
fn try_from(value: RuleConfig) -> Result<Self, Self::Error> {
|
fn try_from(value: RuleConfig) -> Result<Self, Self::Error> {
|
||||||
if value.name.is_none() && value.referrer.is_none() && value.url.is_none() {
|
if value.matchers.is_empty() {
|
||||||
return Err(RuleError::NoMatches);
|
return Err(RuleError::NoMatches);
|
||||||
}
|
}
|
||||||
if value.rename.is_none() && value.target.is_none() {
|
if value.rename.is_none() && value.target.is_none() {
|
||||||
return Err(RuleError::NoAction);
|
return Err(RuleError::NoAction);
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_rule(val: Option<String>) -> Result<Option<Regex>, RuleError> {
|
let matchers = value
|
||||||
let Some(val) = val else {
|
.matchers
|
||||||
return Ok(None);
|
.into_iter()
|
||||||
};
|
.map(|(name, value)| {
|
||||||
|
let res = get_matcher(&name, &value)
|
||||||
Ok(Some(
|
.ok_or_else(|| RuleError::UnknownRule(name.clone()))?;
|
||||||
Regex::new(&val).map_err(|error| RuleError::Regex { input: val, error })?,
|
res.map_err(|error| RuleError::InvalidRule {
|
||||||
))
|
field: name,
|
||||||
}
|
value,
|
||||||
|
error,
|
||||||
|
})
|
||||||
|
})
|
||||||
|
.collect::<Result<Vec<_>, _>>()?;
|
||||||
|
|
||||||
Ok(Rule {
|
Ok(Rule {
|
||||||
name: parse_rule(value.name)?,
|
matchers,
|
||||||
referrer: parse_rule(value.referrer)?,
|
|
||||||
url: parse_rule(value.url)?,
|
|
||||||
target: value.target.map(normalize_path),
|
target: value.target.map(normalize_path),
|
||||||
rename: value.rename,
|
rename: value.rename,
|
||||||
})
|
})
|
||||||
|
|
@ -114,6 +117,12 @@ pub enum RuleError {
|
||||||
NoMatches,
|
NoMatches,
|
||||||
#[error("at least one action rule needs to be defined")]
|
#[error("at least one action rule needs to be defined")]
|
||||||
NoAction,
|
NoAction,
|
||||||
#[error("invalid regex {input}: {error:#}")]
|
#[error("Unknown match rule '{0}'")]
|
||||||
Regex { input: String, error: regex::Error },
|
UnknownRule(String),
|
||||||
|
#[error("Invalid match rule {field} = '{value}': {error:#}")]
|
||||||
|
InvalidRule {
|
||||||
|
field: String,
|
||||||
|
value: String,
|
||||||
|
error: Box<dyn Error>,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
|
||||||
77
src/extractor/mod.rs
Normal file
77
src/extractor/mod.rs
Normal file
|
|
@ -0,0 +1,77 @@
|
||||||
|
use crate::file::FileInfo;
|
||||||
|
use std::borrow::{Borrow, Cow};
|
||||||
|
use std::collections::HashMap;
|
||||||
|
use std::error::Error;
|
||||||
|
use std::hash::Hash;
|
||||||
|
|
||||||
|
#[derive(Default)]
|
||||||
|
pub struct MultiExtractor<'a> {
|
||||||
|
extractors: Vec<DynCow<'a>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
enum DynCow<'a> {
|
||||||
|
Ref(&'a dyn Extractor),
|
||||||
|
Box(Box<dyn Extractor>),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> AsRef<dyn Extractor + 'a> for DynCow<'a> {
|
||||||
|
fn as_ref(&self) -> &(dyn Extractor + 'a) {
|
||||||
|
match self {
|
||||||
|
DynCow::Ref(r) => *r,
|
||||||
|
DynCow::Box(b) => b.as_ref(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> MultiExtractor<'a> {
|
||||||
|
pub fn with_capacity(cap: usize) -> Self {
|
||||||
|
MultiExtractor {
|
||||||
|
extractors: Vec::with_capacity(cap),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn push(&mut self, extractor: &'a dyn Extractor) {
|
||||||
|
self.extractors.push(DynCow::Ref(extractor))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn push_box(&mut self, extractor: Box<dyn Extractor>) {
|
||||||
|
self.extractors.push(DynCow::Box(extractor))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Extractor for MultiExtractor<'_> {
|
||||||
|
fn extract<'this>(&'this self, field: &str) -> Option<Result<Cow<'this, str>, Box<dyn Error>>> {
|
||||||
|
self.extractors
|
||||||
|
.iter()
|
||||||
|
.find_map(|ex| ex.as_ref().extract(field))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub trait Extractor {
|
||||||
|
fn extract<'this>(&'this self, field: &str) -> Option<Result<Cow<'this, str>, Box<dyn Error>>>;
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct FileInfoExtractor<'a> {
|
||||||
|
file: &'a FileInfo,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> FileInfoExtractor<'a> {
|
||||||
|
pub fn new(file: &'a FileInfo) -> Self {
|
||||||
|
FileInfoExtractor { file }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Extractor for FileInfoExtractor<'_> {
|
||||||
|
fn extract<'this>(&'this self, field: &str) -> Option<Result<Cow<'this, str>, Box<dyn Error>>> {
|
||||||
|
Some(Ok(match field {
|
||||||
|
"mtime" => Cow::Owned(self.file.mtime.to_string()),
|
||||||
|
_ => return None,
|
||||||
|
}))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<T: AsRef<str> + Borrow<str> + Eq + Hash + Clone + 'static> Extractor for HashMap<T, T> {
|
||||||
|
fn extract<'this>(&'this self, field: &str) -> Option<Result<Cow<'this, str>, Box<dyn Error>>> {
|
||||||
|
self.get(field).map(T::as_ref).map(Cow::Borrowed).map(Ok)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -14,7 +14,6 @@ pub struct FileInfo {
|
||||||
pub sha256: [u8; 32],
|
pub sha256: [u8; 32],
|
||||||
#[allow(dead_code)]
|
#[allow(dead_code)]
|
||||||
pub mtime: u64,
|
pub mtime: u64,
|
||||||
pub mtime_str: String,
|
|
||||||
pub size: u64,
|
pub size: u64,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -46,7 +45,6 @@ impl FileInfo {
|
||||||
referrer: None,
|
referrer: None,
|
||||||
sha256: [0; 32],
|
sha256: [0; 32],
|
||||||
mtime,
|
mtime,
|
||||||
mtime_str: mtime.to_string(),
|
|
||||||
size: stat.size(),
|
size: stat.size(),
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
||||||
19
src/main.rs
19
src/main.rs
|
|
@ -1,6 +1,6 @@
|
||||||
use crate::config::{normalize_path, GaltonConfig};
|
use crate::config::{normalize_path, GaltonConfig};
|
||||||
use crate::file::FileInfo;
|
use crate::file::FileInfo;
|
||||||
use crate::rule::{Rule, RuleMatch};
|
use crate::rule::{Rule, RuleResult};
|
||||||
use clap::builder::styling::{AnsiColor, Effects};
|
use clap::builder::styling::{AnsiColor, Effects};
|
||||||
use clap::builder::Styles;
|
use clap::builder::Styles;
|
||||||
use clap::{Parser, Subcommand};
|
use clap::{Parser, Subcommand};
|
||||||
|
|
@ -18,7 +18,9 @@ use std::time::Duration;
|
||||||
use tracing::{debug, error, info, instrument};
|
use tracing::{debug, error, info, instrument};
|
||||||
|
|
||||||
mod config;
|
mod config;
|
||||||
|
mod extractor;
|
||||||
mod file;
|
mod file;
|
||||||
|
mod matchers;
|
||||||
mod rule;
|
mod rule;
|
||||||
|
|
||||||
fn styles() -> Styles {
|
fn styles() -> Styles {
|
||||||
|
|
@ -166,7 +168,7 @@ fn is_part(path: &Path) -> bool {
|
||||||
|
|
||||||
fn maybe_link(source: Option<&Path>, target: Option<&str>) {
|
fn maybe_link(source: Option<&Path>, target: Option<&str>) {
|
||||||
if let (Some(source), Some(target)) = (source, target) {
|
if let (Some(source), Some(target)) = (source, target) {
|
||||||
if Path::new(target).exists() {
|
if Path::new(target).is_symlink() {
|
||||||
if let Err(error) = remove_file(target) {
|
if let Err(error) = remove_file(target) {
|
||||||
error!(%error, "failed to remove link target");
|
error!(%error, "failed to remove link target");
|
||||||
return;
|
return;
|
||||||
|
|
@ -183,11 +185,18 @@ fn maybe_link(source: Option<&Path>, target: Option<&str>) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn match_file(file: &FileInfo, rules: &[Rule]) -> Option<RuleMatch> {
|
fn match_file(file: &FileInfo, rules: &[Rule]) -> Option<RuleResult> {
|
||||||
for rule in rules {
|
for rule in rules {
|
||||||
if let Some(result) = rule.matches(file) {
|
if let Some(result) = rule.matches(file) {
|
||||||
debug!(?rule, ?result, "found matching rule");
|
match result {
|
||||||
return Some(result);
|
Ok(result) => {
|
||||||
|
debug!(?rule, ?result, "found matching rule");
|
||||||
|
return Some(result);
|
||||||
|
}
|
||||||
|
Err(error) => {
|
||||||
|
error!(?rule, %error, "error matching rule");
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
None
|
None
|
||||||
|
|
|
||||||
109
src/matchers/filemeta.rs
Normal file
109
src/matchers/filemeta.rs
Normal file
|
|
@ -0,0 +1,109 @@
|
||||||
|
use crate::extractor::Extractor;
|
||||||
|
use crate::file::FileInfo;
|
||||||
|
use crate::matchers::{regex_matches, Matcher};
|
||||||
|
use regex::Regex;
|
||||||
|
use std::borrow::Cow;
|
||||||
|
use std::error::Error;
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
enum FileMetaField {
|
||||||
|
Name,
|
||||||
|
Referrer,
|
||||||
|
Url,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl FileMetaField {
|
||||||
|
fn new(s: &str) -> Option<FileMetaField> {
|
||||||
|
match s {
|
||||||
|
"name" => Some(FileMetaField::Name),
|
||||||
|
"referrer" => Some(FileMetaField::Referrer),
|
||||||
|
"url" => Some(FileMetaField::Url),
|
||||||
|
_ => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn as_str(&self) -> &'static str {
|
||||||
|
match self {
|
||||||
|
FileMetaField::Name => "name",
|
||||||
|
FileMetaField::Referrer => "referrer",
|
||||||
|
FileMetaField::Url => "url",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct FileMetaMatcher {
|
||||||
|
field: FileMetaField,
|
||||||
|
regex: Regex,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn parse(name: &str, value: &str) -> Option<Result<FileMetaMatcher, regex::Error>> {
|
||||||
|
let field = FileMetaField::new(name)?;
|
||||||
|
Some(Regex::new(value).map(|regex| FileMetaMatcher { field, regex }))
|
||||||
|
}
|
||||||
|
|
||||||
|
impl FileMetaMatcher {
|
||||||
|
fn value<'a>(&self, file: &'a FileInfo) -> Option<&'a str> {
|
||||||
|
match self.field {
|
||||||
|
FileMetaField::Name => Some(file.name()),
|
||||||
|
FileMetaField::Referrer => file.referrer.as_deref(),
|
||||||
|
FileMetaField::Url => file.url.as_deref(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Matcher for FileMetaMatcher {
|
||||||
|
fn name(&self) -> &str {
|
||||||
|
self.field.as_str()
|
||||||
|
}
|
||||||
|
fn matches(&self, file: &FileInfo) -> Option<Result<Box<dyn Extractor>, Box<dyn Error>>> {
|
||||||
|
let value = self.value(file)?;
|
||||||
|
let matches = regex_matches(&self.regex, value)?;
|
||||||
|
let extractor = RegexMatchExtractor { matches };
|
||||||
|
Some(Ok(Box::new(extractor)))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
struct RegexMatchExtractor {
|
||||||
|
matches: Vec<(String, String)>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Extractor for RegexMatchExtractor {
|
||||||
|
fn extract(&self, field: &str) -> Option<Result<Cow<str>, Box<dyn Error>>> {
|
||||||
|
let value = self
|
||||||
|
.matches
|
||||||
|
.iter()
|
||||||
|
.find_map(|(name, value)| (name.as_str() == field).then_some(value.as_str()))?;
|
||||||
|
Some(Ok(Cow::Borrowed(value)))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_file_meta_matcher() {
|
||||||
|
let txt_file = FileInfo {
|
||||||
|
path: "/tmp/test.txt".into(),
|
||||||
|
url: Some("https://example.com/test.txt".into()),
|
||||||
|
referrer: Some("https://example.com/downloads".into()),
|
||||||
|
sha256: [0; 32],
|
||||||
|
mtime: 1234,
|
||||||
|
size: 100,
|
||||||
|
};
|
||||||
|
let png_file = FileInfo {
|
||||||
|
path: "/tmp/test.png".into(),
|
||||||
|
url: Some("https://example.com/test.png".into()),
|
||||||
|
referrer: Some("https://example.com/images".into()),
|
||||||
|
sha256: [0; 32],
|
||||||
|
mtime: 1234,
|
||||||
|
size: 100,
|
||||||
|
};
|
||||||
|
|
||||||
|
let txt_matcher = parse("name", r#"(?<txt_name>.+)\.txt"#).unwrap().unwrap();
|
||||||
|
let extracted = txt_matcher.matches(&txt_file).unwrap().unwrap();
|
||||||
|
assert!(txt_matcher.matches(&png_file).is_none());
|
||||||
|
|
||||||
|
assert_eq!(extracted.extract("txt_name").unwrap().unwrap(), "test");
|
||||||
|
|
||||||
|
let downloads_matcher = parse("referrer", r#"downloads"#).unwrap().unwrap();
|
||||||
|
assert!(downloads_matcher.matches(&txt_file).is_some());
|
||||||
|
assert!(downloads_matcher.matches(&png_file).is_none());
|
||||||
|
}
|
||||||
49
src/matchers/mod.rs
Normal file
49
src/matchers/mod.rs
Normal file
|
|
@ -0,0 +1,49 @@
|
||||||
|
use crate::extractor::Extractor;
|
||||||
|
use crate::file::FileInfo;
|
||||||
|
use regex::Regex;
|
||||||
|
use std::error::Error;
|
||||||
|
use std::fmt::Debug;
|
||||||
|
use thiserror::Error;
|
||||||
|
|
||||||
|
mod filemeta;
|
||||||
|
|
||||||
|
#[derive(Debug, Error)]
|
||||||
|
#[error("Malformed match rule '{input}': {error}")]
|
||||||
|
pub struct MatcherParseError {
|
||||||
|
input: String,
|
||||||
|
error: Box<dyn Error>,
|
||||||
|
}
|
||||||
|
|
||||||
|
fn map_result<T: Matcher + 'static, E: Error + 'static>(
|
||||||
|
res: Result<T, E>,
|
||||||
|
) -> Result<Box<dyn Matcher>, Box<dyn Error>> {
|
||||||
|
res.map(|matcher| Box::new(matcher) as Box<dyn Matcher>)
|
||||||
|
.map_err(|err| Box::new(err) as Box<dyn Error>)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn get_matcher(name: &str, value: &str) -> Option<Result<Box<dyn Matcher>, Box<dyn Error>>> {
|
||||||
|
if let Some(res) = filemeta::parse(name, value) {
|
||||||
|
return Some(map_result(res));
|
||||||
|
}
|
||||||
|
|
||||||
|
None
|
||||||
|
}
|
||||||
|
|
||||||
|
pub trait Matcher: Debug {
|
||||||
|
fn name(&self) -> &str;
|
||||||
|
|
||||||
|
fn matches(&self, file: &FileInfo) -> Option<Result<Box<dyn Extractor>, Box<dyn Error>>>;
|
||||||
|
}
|
||||||
|
|
||||||
|
fn regex_matches(regex: &Regex, string: &str) -> Option<Vec<(String, String)>> {
|
||||||
|
let captures = regex.captures(string)?;
|
||||||
|
Some(
|
||||||
|
captures
|
||||||
|
.iter()
|
||||||
|
.zip(regex.capture_names())
|
||||||
|
.skip(1)
|
||||||
|
.filter_map(|(m, name)| m.zip(name))
|
||||||
|
.map(|(m, name)| (name.into(), m.as_str().into()))
|
||||||
|
.collect(),
|
||||||
|
)
|
||||||
|
}
|
||||||
213
src/rule.rs
213
src/rule.rs
|
|
@ -1,117 +1,142 @@
|
||||||
|
use crate::extractor::{Extractor, FileInfoExtractor, MultiExtractor};
|
||||||
use crate::file::FileInfo;
|
use crate::file::FileInfo;
|
||||||
|
use crate::matchers::Matcher;
|
||||||
use regex::Regex;
|
use regex::Regex;
|
||||||
use serde::Deserialize;
|
use serde::Deserialize;
|
||||||
use std::borrow::Cow;
|
use std::error::Error;
|
||||||
use std::collections::HashMap;
|
use std::sync::OnceLock;
|
||||||
|
use thiserror::Error;
|
||||||
|
|
||||||
#[derive(Debug, Deserialize)]
|
#[derive(Debug, Deserialize)]
|
||||||
#[serde(try_from = "crate::config::RuleConfig")]
|
#[serde(try_from = "crate::config::RuleConfig")]
|
||||||
pub struct Rule {
|
pub struct Rule {
|
||||||
pub name: Option<Regex>,
|
pub matchers: Vec<Box<dyn Matcher>>,
|
||||||
pub referrer: Option<Regex>,
|
|
||||||
pub url: Option<Regex>,
|
|
||||||
pub target: Option<String>,
|
pub target: Option<String>,
|
||||||
pub rename: Option<String>,
|
pub rename: Option<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub struct RuleMatch {
|
pub struct RuleResult {
|
||||||
pub target: Option<String>,
|
pub target: Option<String>,
|
||||||
pub rename: Option<String>,
|
pub rename: Option<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Hash, PartialEq, Eq, Debug)]
|
|
||||||
enum CaptureName<'a> {
|
|
||||||
Named(&'a str),
|
|
||||||
Unnamed(usize),
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'a> CaptureName<'a> {
|
|
||||||
pub fn to_str(&self) -> Cow<'a, str> {
|
|
||||||
match self {
|
|
||||||
CaptureName::Named(s) => Cow::Borrowed(s),
|
|
||||||
CaptureName::Unnamed(1) => Cow::Borrowed("1"),
|
|
||||||
CaptureName::Unnamed(2) => Cow::Borrowed("2"),
|
|
||||||
CaptureName::Unnamed(3) => Cow::Borrowed("3"),
|
|
||||||
CaptureName::Unnamed(4) => Cow::Borrowed("4"),
|
|
||||||
CaptureName::Unnamed(5) => Cow::Borrowed("5"),
|
|
||||||
CaptureName::Unnamed(6) => Cow::Borrowed("6"),
|
|
||||||
CaptureName::Unnamed(7) => Cow::Borrowed("7"),
|
|
||||||
CaptureName::Unnamed(8) => Cow::Borrowed("8"),
|
|
||||||
CaptureName::Unnamed(9) => Cow::Borrowed("9"),
|
|
||||||
CaptureName::Unnamed(10) => Cow::Borrowed("10"),
|
|
||||||
CaptureName::Unnamed(i) => Cow::Owned(i.to_string()),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Rule {
|
impl Rule {
|
||||||
pub fn matches(&self, file: &FileInfo) -> Option<RuleMatch> {
|
pub fn matches(&self, file: &FileInfo) -> Option<Result<RuleResult, RuleMatchError>> {
|
||||||
let mut captures: HashMap<CaptureName, &str> = HashMap::new();
|
let file_extractor = FileInfoExtractor::new(file);
|
||||||
captures.insert(CaptureName::Named("mtime"), &file.mtime_str);
|
let mut extractors = MultiExtractor::with_capacity(self.matchers.len() + 1);
|
||||||
|
extractors.push(&file_extractor);
|
||||||
|
|
||||||
if let Some(name) = &self.name {
|
for matcher in &self.matchers {
|
||||||
if !extract_matches(name, file.name(), &mut captures) {
|
match matcher.matches(file)? {
|
||||||
return None;
|
Ok(extractor) => extractors.push_box(extractor),
|
||||||
}
|
Err(error) => {
|
||||||
}
|
return Some(Err(RuleMatchError::Matcher {
|
||||||
|
field: matcher.name().into(),
|
||||||
if let Some(referrer) = &self.referrer {
|
error,
|
||||||
if !extract_matches(
|
}));
|
||||||
referrer,
|
|
||||||
file.referrer.as_deref().unwrap_or_default(),
|
|
||||||
&mut captures,
|
|
||||||
) {
|
|
||||||
return None;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if let Some(url) = &self.url {
|
|
||||||
if !extract_matches(url, file.url.as_deref().unwrap_or_default(), &mut captures) {
|
|
||||||
return None;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
let apply = |input| apply_captures(input, &captures);
|
|
||||||
|
|
||||||
Some(RuleMatch {
|
|
||||||
target: self.target.as_deref().map(apply),
|
|
||||||
rename: self.rename.as_deref().map(apply),
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn apply_captures(input: &str, captures: &HashMap<CaptureName, &str>) -> String {
|
|
||||||
let mut output = input.to_string();
|
|
||||||
for (name, value) in captures {
|
|
||||||
let name = name.to_str();
|
|
||||||
if output.contains(name.as_ref()) && output.contains('$') {
|
|
||||||
output = output.replace(&format!("${name}"), value);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
output
|
|
||||||
}
|
|
||||||
|
|
||||||
fn extract_matches<'a, 'b>(
|
|
||||||
regex: &'a Regex,
|
|
||||||
string: &'b str,
|
|
||||||
output: &mut HashMap<CaptureName<'a>, &'b str>,
|
|
||||||
) -> bool {
|
|
||||||
match regex.captures(string) {
|
|
||||||
Some(caps) => {
|
|
||||||
for (i, (m, name)) in caps.iter().zip(regex.capture_names()).enumerate().skip(1) {
|
|
||||||
if let Some(m) = m {
|
|
||||||
let cap_name = match name {
|
|
||||||
Some(name) => CaptureName::Named(name),
|
|
||||||
None => CaptureName::Unnamed(i),
|
|
||||||
};
|
|
||||||
output.insert(cap_name, m.as_str());
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
None => {
|
|
||||||
return false;
|
let apply = |input: Option<&str>| {
|
||||||
}
|
let Some(input) = input else { return Ok(None) };
|
||||||
|
apply_extractors(input, &extractors).map(Some)
|
||||||
|
};
|
||||||
|
|
||||||
|
let target = match apply(self.target.as_deref()) {
|
||||||
|
Ok(target) => target,
|
||||||
|
Err(e) => return Some(Err(e)),
|
||||||
|
};
|
||||||
|
let rename = match apply(self.rename.as_deref()) {
|
||||||
|
Ok(target) => target,
|
||||||
|
Err(e) => return Some(Err(e)),
|
||||||
|
};
|
||||||
|
|
||||||
|
Some(Ok(RuleResult { target, rename }))
|
||||||
}
|
}
|
||||||
true
|
}
|
||||||
|
|
||||||
|
static SUBST_REGEX: OnceLock<Regex> = OnceLock::new();
|
||||||
|
|
||||||
|
fn apply_extractors<E: Extractor>(input: &str, extractor: &E) -> Result<String, RuleMatchError> {
|
||||||
|
let subst_regex =
|
||||||
|
SUBST_REGEX.get_or_init(|| Regex::new(r#"\$(\{([^})]+)}|([a-zA-Z0-9]+))"#).unwrap());
|
||||||
|
|
||||||
|
// copied from `Regex::replace_all` adjusted to support returning errors
|
||||||
|
let mut it = subst_regex.captures_iter(input).enumerate().peekable();
|
||||||
|
if it.peek().is_none() {
|
||||||
|
return Ok(input.into());
|
||||||
|
}
|
||||||
|
let mut new = String::with_capacity(input.len());
|
||||||
|
let mut last_match = 0;
|
||||||
|
for (_, cap) in it {
|
||||||
|
let m = &cap.get(0).unwrap();
|
||||||
|
new.push_str(&input[last_match..m.start()]);
|
||||||
|
|
||||||
|
let name: &str = cap.get(2).or_else(|| cap.get(3)).unwrap().as_str();
|
||||||
|
let extracted = extractor
|
||||||
|
.extract(name)
|
||||||
|
.ok_or_else(|| RuleMatchError::UnknownSubstitution { name: name.into() })?
|
||||||
|
.map_err(|error| RuleMatchError::Matcher {
|
||||||
|
field: name.into(),
|
||||||
|
error,
|
||||||
|
})?;
|
||||||
|
new.push_str(&extracted);
|
||||||
|
|
||||||
|
last_match = m.end();
|
||||||
|
}
|
||||||
|
new.push_str(&input[last_match..]);
|
||||||
|
Ok(new)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Error)]
|
||||||
|
pub enum RuleMatchError {
|
||||||
|
#[error("Error matching {field}: {error:#}")]
|
||||||
|
Matcher {
|
||||||
|
field: String,
|
||||||
|
error: Box<dyn Error>,
|
||||||
|
},
|
||||||
|
#[error("Unknown substitution {name}")]
|
||||||
|
UnknownSubstitution { name: String },
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_apply_extractors() {
|
||||||
|
use maplit::hashmap;
|
||||||
|
|
||||||
|
let extractor = hashmap! {
|
||||||
|
"foo" => "bar",
|
||||||
|
"longer-key" => "value"
|
||||||
|
};
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
"test bar",
|
||||||
|
apply_extractors("test $foo", &extractor).unwrap()
|
||||||
|
);
|
||||||
|
assert!(apply_extractors("$foobar", &extractor).is_err());
|
||||||
|
assert_eq!("barbar", apply_extractors("${foo}bar", &extractor).unwrap());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_rule() {
|
||||||
|
use crate::matchers::get_matcher;
|
||||||
|
let rule = Rule {
|
||||||
|
matchers: vec![get_matcher("name", r#"\.txt"#).unwrap().unwrap()],
|
||||||
|
target: Some("/target/dir".into()),
|
||||||
|
rename: None,
|
||||||
|
};
|
||||||
|
|
||||||
|
let txt_file = FileInfo {
|
||||||
|
path: "/tmp/test.txt".into(),
|
||||||
|
url: Some("https://example.com/test.txt".into()),
|
||||||
|
referrer: Some("https://example.com/downloads".into()),
|
||||||
|
sha256: [0; 32],
|
||||||
|
mtime: 1234,
|
||||||
|
size: 100,
|
||||||
|
};
|
||||||
|
|
||||||
|
let result = rule.matches(&txt_file).unwrap().unwrap();
|
||||||
|
assert_eq!(Some("/target/dir"), result.target.as_deref());
|
||||||
|
assert!(result.rename.is_none());
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue