mirror of
https://codeberg.org/icewind/galton.git
synced 2026-06-03 18:34:08 +02:00
add remove-duplicates option
This commit is contained in:
parent
1781b20f20
commit
6705debd2a
8 changed files with 308 additions and 30 deletions
|
|
@ -31,6 +31,8 @@ impl GaltonConfig {
|
|||
#[derive(Debug, Deserialize, Default)]
|
||||
pub struct WatchConfig {
|
||||
symlink: Option<String>,
|
||||
#[serde(rename = "remove-duplicates", default)]
|
||||
pub remove_duplicates: bool,
|
||||
}
|
||||
|
||||
impl WatchConfig {
|
||||
|
|
|
|||
85
src/file.rs
85
src/file.rs
|
|
@ -1,3 +1,8 @@
|
|||
use hex::FromHex;
|
||||
use sha2::{Digest, Sha256};
|
||||
use std::fs::File;
|
||||
use std::io::Read;
|
||||
use std::os::unix::fs::MetadataExt;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::time::UNIX_EPOCH;
|
||||
use thiserror::Error;
|
||||
|
|
@ -6,9 +11,11 @@ pub struct FileInfo {
|
|||
pub path: String,
|
||||
pub url: Option<String>,
|
||||
pub referrer: Option<String>,
|
||||
pub sha256: [u8; 32],
|
||||
#[allow(dead_code)]
|
||||
pub mtime: u64,
|
||||
pub mtime_str: String,
|
||||
pub size: u64,
|
||||
}
|
||||
|
||||
impl FileInfo {
|
||||
|
|
@ -37,8 +44,10 @@ impl FileInfo {
|
|||
path: path.into(),
|
||||
url: None,
|
||||
referrer: None,
|
||||
sha256: [0; 32],
|
||||
mtime,
|
||||
mtime_str: mtime.to_string(),
|
||||
size: stat.size(),
|
||||
};
|
||||
|
||||
let attributes = xattr::list(path).unwrap_or_default();
|
||||
|
|
@ -52,12 +61,33 @@ impl FileInfo {
|
|||
match attr {
|
||||
"user.xdg.origin.url" => file.url = Some(val),
|
||||
"user.xdg.referrer.url" => file.referrer = Some(val),
|
||||
"user.checksum.sha256" => {
|
||||
if let Ok(sha) = <[u8; 32]>::from_hex(&val) {
|
||||
file.sha256 = sha;
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if file.sha256 == [0; 32] {
|
||||
file.sha256 = hash_file(path).map_err(|error| FileError::Hash {
|
||||
path: path.into(),
|
||||
error,
|
||||
})?;
|
||||
xattr::set(
|
||||
path,
|
||||
"user.checksum.sha256",
|
||||
hex::encode(file.sha256).as_bytes(),
|
||||
)
|
||||
.map_err(|error| FileError::StoreHash {
|
||||
path: path.into(),
|
||||
error,
|
||||
})?;
|
||||
}
|
||||
|
||||
Ok(file)
|
||||
}
|
||||
|
||||
|
|
@ -74,6 +104,51 @@ impl FileInfo {
|
|||
.map(|(parent, _)| parent)
|
||||
.unwrap_or("")
|
||||
}
|
||||
|
||||
pub fn is_duplicate(&self, other: impl AsRef<Path>) -> Result<bool, FileError> {
|
||||
let other = other.as_ref();
|
||||
let other_stat = other.metadata().map_err(|error| FileError::Stat {
|
||||
path: other.into(),
|
||||
error,
|
||||
})?;
|
||||
|
||||
if other_stat.size() != self.size {
|
||||
return Ok(false);
|
||||
}
|
||||
Ok(self.sha256
|
||||
== load_or_calculate_hash(other).map_err(|error| FileError::Hash {
|
||||
path: other.into(),
|
||||
error,
|
||||
})?)
|
||||
}
|
||||
}
|
||||
|
||||
fn load_or_calculate_hash(path: impl AsRef<Path>) -> Result<[u8; 32], std::io::Error> {
|
||||
let path = path.as_ref();
|
||||
if let Ok(Some(val)) = xattr::get(path, "user.checksum.sha256") {
|
||||
if let Ok(Ok(hash)) = String::from_utf8(val).as_deref().map(<[u8; 32]>::from_hex) {
|
||||
return Ok(hash);
|
||||
}
|
||||
}
|
||||
|
||||
let hash = hash_file(path)?;
|
||||
xattr::set(path, "user.checksum.sha256", hex::encode(hash).as_bytes())?;
|
||||
Ok(hash)
|
||||
}
|
||||
|
||||
fn hash_file(path: impl AsRef<Path>) -> Result<[u8; 32], std::io::Error> {
|
||||
let mut file = File::open(path)?;
|
||||
let mut buffer = [0u8; 8192];
|
||||
let mut hasher = Sha256::new();
|
||||
|
||||
loop {
|
||||
let count = file.read(&mut buffer)?;
|
||||
if count == 0 {
|
||||
break;
|
||||
}
|
||||
hasher.update(&buffer[..count]);
|
||||
}
|
||||
Ok(hasher.finalize().0)
|
||||
}
|
||||
|
||||
#[derive(Debug, Error)]
|
||||
|
|
@ -85,4 +160,14 @@ pub enum FileError {
|
|||
path: PathBuf,
|
||||
error: std::io::Error,
|
||||
},
|
||||
#[error("Failed to hash: {}", path.display())]
|
||||
Hash {
|
||||
path: PathBuf,
|
||||
error: std::io::Error,
|
||||
},
|
||||
#[error("Failed to store hash for: {}", path.display())]
|
||||
StoreHash {
|
||||
path: PathBuf,
|
||||
error: std::io::Error,
|
||||
},
|
||||
}
|
||||
|
|
|
|||
139
src/main.rs
139
src/main.rs
|
|
@ -5,10 +5,10 @@ use clap::builder::styling::{AnsiColor, Effects};
|
|||
use clap::builder::Styles;
|
||||
use clap::{Parser, Subcommand};
|
||||
use main_error::MainResult;
|
||||
use notify_debouncer_full::notify::event::{AccessKind, AccessMode};
|
||||
use notify_debouncer_full::notify::event::{AccessKind, AccessMode, ModifyKind, RenameMode};
|
||||
use notify_debouncer_full::notify::{EventKind, RecursiveMode};
|
||||
use notify_debouncer_full::{new_debouncer, DebounceEventResult};
|
||||
use std::fs::{copy, create_dir_all, remove_file, rename};
|
||||
use std::fs::{copy, create_dir_all, read_dir, remove_file, rename};
|
||||
use std::io::ErrorKind;
|
||||
use std::os::unix::fs::symlink;
|
||||
use std::path::{Path, PathBuf};
|
||||
|
|
@ -63,7 +63,7 @@ fn main() -> MainResult {
|
|||
match args.command {
|
||||
Commands::File { path } => {
|
||||
let file = FileInfo::load(path)?;
|
||||
handle_file(&file, &config.rule);
|
||||
handle_file(&file, &config.rule, false);
|
||||
}
|
||||
Commands::Watch { path, recursive } => {
|
||||
let path = normalize_path(path);
|
||||
|
|
@ -87,7 +87,12 @@ fn main() -> MainResult {
|
|||
}
|
||||
})?;
|
||||
for res in rx {
|
||||
handle_watch_event(res, &rules, symlink.as_deref());
|
||||
handle_watch_event(
|
||||
res,
|
||||
&rules,
|
||||
symlink.as_deref(),
|
||||
config.watch.remove_duplicates,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -95,24 +100,55 @@ fn main() -> MainResult {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
fn handle_watch_event(result: DebounceEventResult, rules: &[Rule], link_target: Option<&str>) {
|
||||
fn handle_watch_event(
|
||||
result: DebounceEventResult,
|
||||
rules: &[Rule],
|
||||
link_target: Option<&str>,
|
||||
remove_duplicates: bool,
|
||||
) {
|
||||
let handle_path = |path: &Path| {
|
||||
// give originfox time to set xattr
|
||||
sleep(Duration::from_millis(200));
|
||||
if is_part(path) {
|
||||
debug!("skipping part file");
|
||||
return;
|
||||
}
|
||||
|
||||
match FileInfo::load(path) {
|
||||
Ok(file) => maybe_link(
|
||||
handle_file(&file, rules, remove_duplicates).as_deref(),
|
||||
link_target,
|
||||
),
|
||||
Err(error) => {
|
||||
error!(%error, "failed to load file info");
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
match result {
|
||||
Ok(events) => {
|
||||
for event in events {
|
||||
if event.kind == EventKind::Access(AccessKind::Close(AccessMode::Write)) {
|
||||
for path in &event.paths {
|
||||
debug!("write event for {}", path.display());
|
||||
// give originfox time to set xattr
|
||||
sleep(Duration::from_millis(200));
|
||||
match FileInfo::load(path) {
|
||||
Ok(file) => {
|
||||
maybe_link(handle_file(&file, rules).as_deref(), link_target)
|
||||
}
|
||||
Err(error) => {
|
||||
error!(%error, "failed to load file info");
|
||||
}
|
||||
match event.kind {
|
||||
EventKind::Access(AccessKind::Close(AccessMode::Write)) => {
|
||||
for path in &event.paths {
|
||||
debug!("write event for {}", path.display());
|
||||
handle_path(path);
|
||||
}
|
||||
}
|
||||
|
||||
EventKind::Modify(ModifyKind::Name(RenameMode::Both)) => {
|
||||
if event.paths.len() == 2 {
|
||||
let from = &event.paths[0];
|
||||
let to = &event.paths[1];
|
||||
debug!("rename event for {} -> {}", from.display(), to.display());
|
||||
if is_part(from) && !is_part(to) {
|
||||
handle_path(to);
|
||||
}
|
||||
} else {
|
||||
error!("Invalid rename event");
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -124,7 +160,11 @@ fn handle_watch_event(result: DebounceEventResult, rules: &[Rule], link_target:
|
|||
}
|
||||
}
|
||||
|
||||
fn maybe_link(source: Option<&str>, target: Option<&str>) {
|
||||
fn is_part(path: &Path) -> bool {
|
||||
path.extension().and_then(|ext| ext.to_str()) == Some("part")
|
||||
}
|
||||
|
||||
fn maybe_link(source: Option<&Path>, target: Option<&str>) {
|
||||
if let (Some(source), Some(target)) = (source, target) {
|
||||
if Path::new(target).exists() {
|
||||
if let Err(error) = remove_file(target) {
|
||||
|
|
@ -134,7 +174,7 @@ fn maybe_link(source: Option<&str>, target: Option<&str>) {
|
|||
}
|
||||
match symlink(source, target) {
|
||||
Ok(()) => {
|
||||
info!(to = target, from = source, "created symlink");
|
||||
info!(to = target, from = %source.display(), "created symlink");
|
||||
}
|
||||
Err(error) => {
|
||||
error!(%error, "failed to link target");
|
||||
|
|
@ -154,10 +194,22 @@ fn match_file(file: &FileInfo, rules: &[Rule]) -> Option<RuleMatch> {
|
|||
}
|
||||
|
||||
#[instrument(skip_all, fields(file = file.path))]
|
||||
fn handle_file(file: &FileInfo, rules: &[Rule]) -> Option<String> {
|
||||
fn handle_file(file: &FileInfo, rules: &[Rule], remove_duplicates: bool) -> Option<PathBuf> {
|
||||
let Some(result) = match_file(file, rules) else {
|
||||
info!("no matches");
|
||||
return None;
|
||||
info!(url = file.url, "removing duplicate");
|
||||
|
||||
if remove_duplicates {
|
||||
let parent = Path::new(&file.path).parent().unwrap();
|
||||
if let Some(duplicate) = has_duplicate(file, parent) {
|
||||
info!(url = file.url, diplicate = %duplicate.display(), "removing duplicate");
|
||||
if let Err(error) = remove_file(&file.path) {
|
||||
error!(%error, "failed to remove duplicate");
|
||||
}
|
||||
return Some(duplicate);
|
||||
}
|
||||
}
|
||||
|
||||
return Some(file.path.clone().into());
|
||||
};
|
||||
|
||||
let parent = result.target.as_deref().unwrap_or_else(|| file.parent());
|
||||
|
|
@ -168,12 +220,22 @@ fn handle_file(file: &FileInfo, rules: &[Rule]) -> Option<String> {
|
|||
return None;
|
||||
}
|
||||
|
||||
if remove_duplicates {
|
||||
if let Some(duplicate) = has_duplicate(file, parent) {
|
||||
info!(url = file.url, diplicate = %duplicate.display(), "removing duplicate");
|
||||
if let Err(error) = remove_file(&file.path) {
|
||||
error!(%error, "failed to remove duplicate");
|
||||
}
|
||||
return Some(duplicate);
|
||||
}
|
||||
}
|
||||
|
||||
let target = format!("{parent}/{name}");
|
||||
|
||||
match cross_storage_move(&file.path, &target) {
|
||||
Ok(()) => {
|
||||
info!(target, "moved file");
|
||||
Some(target)
|
||||
Some(target.into())
|
||||
}
|
||||
Err(error) => {
|
||||
info!(target, ?error, "failed to moved file");
|
||||
|
|
@ -182,6 +244,37 @@ fn handle_file(file: &FileInfo, rules: &[Rule]) -> Option<String> {
|
|||
}
|
||||
}
|
||||
|
||||
fn has_duplicate(file: &FileInfo, dir: impl AsRef<Path>) -> Option<PathBuf> {
|
||||
let dir = match read_dir(dir) {
|
||||
Ok(dir) => dir,
|
||||
Err(error) => {
|
||||
error!(%error, "failed to list target directory");
|
||||
return None;
|
||||
}
|
||||
};
|
||||
for entry in dir.flatten() {
|
||||
let path = entry.path();
|
||||
if path.to_str() == Some(file.path.as_str()) {
|
||||
continue;
|
||||
}
|
||||
if !path.is_file() {
|
||||
continue;
|
||||
}
|
||||
|
||||
match file.is_duplicate(&path) {
|
||||
Ok(is_dup) => {
|
||||
if is_dup {
|
||||
return Some(path);
|
||||
}
|
||||
}
|
||||
Err(error) => {
|
||||
error!(%error, path = %path.display(), "failed to determine if a file is duplicate");
|
||||
}
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
fn cross_storage_move(source: impl AsRef<Path>, target: impl AsRef<Path>) -> std::io::Result<()> {
|
||||
let source = source.as_ref();
|
||||
let target = target.as_ref();
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue