refactor(storage): read dedup mtimes once instead of in sort closure
sort_by may call the comparator many times, so the previous tiebreaker re-read each duplicate file's metadata on every comparison. With N duplicates that's O(N log N) stat calls, and the ordering could flip mid-sort if a file was touched concurrently. Snapshot mtime per file up front and sort on the cached values.
This commit is contained in:
parent
4e8f7c4536
commit
937b6c2c7d
|
|
@ -457,26 +457,37 @@ impl Storage for FileSystemStorage {
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut tasks = Vec::new();
|
let mut tasks = Vec::new();
|
||||||
for (_id, mut entries) in by_id {
|
for (_id, entries) in by_id {
|
||||||
if entries.len() > 1 {
|
let winner = if entries.len() > 1 {
|
||||||
entries.sort_by(|a, b| {
|
// Read mtime once per file so sort_by doesn't hit the filesystem
|
||||||
|
// O(n log n) times and can't produce inconsistent orderings if a
|
||||||
|
// file is touched mid-sort.
|
||||||
|
let mut with_mtime: Vec<(PathBuf, Task, Option<std::time::SystemTime>)> = entries
|
||||||
|
.into_iter()
|
||||||
|
.map(|(p, t)| {
|
||||||
|
let mtime = fs::metadata(&p).and_then(|m| m.modified()).ok();
|
||||||
|
(p, t, mtime)
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
with_mtime.sort_by(|a, b| {
|
||||||
// Primary: highest version first
|
// Primary: highest version first
|
||||||
let version_cmp = b.1.version.cmp(&a.1.version);
|
let version_cmp = b.1.version.cmp(&a.1.version);
|
||||||
if version_cmp != std::cmp::Ordering::Equal {
|
if version_cmp != std::cmp::Ordering::Equal {
|
||||||
return version_cmp;
|
return version_cmp;
|
||||||
}
|
}
|
||||||
// Tiebreaker: most recently modified file first
|
// Tiebreaker: most recently modified file first
|
||||||
let mtime_a = fs::metadata(&a.0).and_then(|m| m.modified()).ok();
|
b.2.cmp(&a.2)
|
||||||
let mtime_b = fs::metadata(&b.0).and_then(|m| m.modified()).ok();
|
|
||||||
mtime_b.cmp(&mtime_a)
|
|
||||||
});
|
});
|
||||||
for (stale_path, _) in entries.drain(1..) {
|
for (stale_path, _, _) in with_mtime.drain(1..) {
|
||||||
if let Err(e) = fs::remove_file(&stale_path) {
|
if let Err(e) = fs::remove_file(&stale_path) {
|
||||||
eprintln!("Warning: failed to remove stale duplicate task file {:?}: {}", stale_path, e);
|
eprintln!("Warning: failed to remove stale duplicate task file {:?}: {}", stale_path, e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
with_mtime.into_iter().next().map(|(_, t, _)| t)
|
||||||
let (_, task) = entries.into_iter().next()
|
} else {
|
||||||
|
entries.into_iter().next().map(|(_, t)| t)
|
||||||
|
};
|
||||||
|
let task = winner
|
||||||
.ok_or_else(|| Error::InvalidData("Empty dedup entries for task".to_string()))?;
|
.ok_or_else(|| Error::InvalidData("Empty dedup entries for task".to_string()))?;
|
||||||
tasks.push(task);
|
tasks.push(task);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue