Skip to main content

fmf_core\index/
compact.rs

1//! Compaction (M2): rebuild the index without tombstoned rows and without
2//! the name bytes renames abandoned in the pools. Without it both grow
3//! forever under USN traffic and eventually eat the B/entry RAM budget.
4//!
5//! The whole trick is the remapping order: live entries keep their relative
6//! id order (old-id ascending → new ids 0..live). Every sorted structure
7//! orders by (key, id) with identical keys on both sides, so filtering the
8//! dead and renumbering the survivors preserves sortedness — `perm_name`
9//! and the FRN index copy over in O(n) with **no re-sort** (ADR-0009).
10//!
11//! Swap-in goes through `VolumeSlot::install_index`, which bumps the
12//! structural generation: open result handles go hard-stale and the UI
13//! re-issues its query (docs/ARCHITECTURE.md, generation 2-tier).
14
15use parking_lot::Mutex;
16
17use super::{EntryId, NO_PARENT, VolumeIndex};
18
19/// Below this size the garbage can't be worth a rebuild.
20const COMPACT_MIN_ENTRIES: usize = 100_000;
21/// Tombstone share that triggers compaction (matches the `OffsetTable`'s
22/// stale-rebuild instinct: past ~1/8 dead weight, rebuilding wins).
23const COMPACT_TOMBSTONE_RATIO: f64 = 0.125;
24/// Reclaimable pool bytes that trigger compaction regardless of ratio.
25const COMPACT_DEAD_NAME_BYTES: u64 = 32 << 20;
26
27impl VolumeIndex {
28    /// Should this index be compacted? (Policy entry point for the volume
29    /// thread, once per applied USN batch.)
30    pub fn compaction_due(&self) -> bool {
31        self.compaction_due_past(COMPACT_MIN_ENTRIES)
32    }
33
34    fn compaction_due_past(&self, min_entries: usize) -> bool {
35        self.len() >= min_entries.max(1)
36            && (self.tombstone_ratio() > COMPACT_TOMBSTONE_RATIO
37                || self.dead_name_bytes > COMPACT_DEAD_NAME_BYTES)
38    }
39
40    /// A compacted copy: live entries only, pools rebuilt without garbage,
41    /// permutation and FRN index remapped without re-sorting. Children of a
42    /// tombstoned directory attach to the root — the same orphan policy as
43    /// `push_raw`. The copy starts at generation zero on all three counters;
44    /// `install_index` carries the structural generation forward.
45    ///
46    /// Call only at a batch boundary (the FRN index must cover every entry —
47    /// `merge_new_into_permutations` just ran).
48    #[must_use]
49    pub fn compacted(&self) -> Self {
50        let n = self.len();
51        // Old → new id; NO_PARENT marks the dead.
52        let mut remap: Vec<EntryId> = vec![NO_PARENT; n];
53        let mut live: u32 = 0;
54        for id in 0..n as u32 {
55            if self.is_live(id) {
56                remap[id as usize] = live;
57                live += 1;
58            }
59        }
60        debug_assert!(
61            self.is_live(Self::ROOT),
62            "the root entry is never tombstoned"
63        );
64
65        let mut out = Self {
66            lower_pool: Vec::with_capacity(self.lower_pool.len()),
67            orig_pool: Vec::with_capacity(self.orig_pool.len()),
68            orig_off: Vec::with_capacity(live as usize),
69            name_off: Vec::with_capacity(live as usize),
70            name_len: Vec::with_capacity(live as usize),
71            parent: Vec::with_capacity(live as usize),
72            size_lo: Vec::with_capacity(live as usize),
73            size_ovf: rustc_hash::FxHashMap::default(),
74            mtime: Vec::with_capacity(live as usize),
75            frn: Vec::with_capacity(live as usize),
76            flag: Vec::with_capacity(live as usize),
77            frn_index: self.frn_index.compact(&remap, live),
78            perm_name: Vec::with_capacity(live as usize),
79            content_generation: 0,
80            structural_generation: 0,
81            dir_topology_generation: 0,
82            tombstones: 0,
83            dead_name_bytes: 0,
84            derived_cache: Mutex::new(None),
85        };
86
87        for id in 0..n as u32 {
88            if !self.is_live(id) {
89                continue;
90            }
91            out.name_off.push(out.lower_pool.len() as u32);
92            out.name_len.push(self.name_len[id as usize]);
93            out.lower_pool.extend_from_slice(self.lower_name(id));
94            out.orig_off.push(if self.is_fold_identical(id) {
95                u32::MAX
96            } else {
97                let off = out.orig_pool.len() as u32;
98                out.orig_pool.extend_from_slice(self.name(id));
99                off
100            });
101            let p = self.parent[id as usize];
102            out.parent.push(if p == NO_PARENT {
103                NO_PARENT // the root
104            } else {
105                match remap[p as usize] {
106                    NO_PARENT => Self::ROOT, // orphaned by a dead dir
107                    new_p => new_p,
108                }
109            });
110            out.push_size(self.size(id));
111            out.mtime.push(self.mtime[id as usize]);
112            out.frn.push(self.frn[id as usize]);
113            out.flag.push(self.flag[id as usize]);
114        }
115
116        out.perm_name = self
117            .perm_name
118            .iter()
119            .filter_map(|&id| match remap[id as usize] {
120                NO_PARENT => None,
121                new_id => Some(new_id),
122            })
123            .collect();
124
125        out.shrink_to_fit();
126        out
127    }
128}
129
130#[cfg(test)]
131mod tests {
132    use super::*;
133    use crate::index::SortKey;
134    use crate::index::testutil::{build_sample, raw, u16s};
135
136    /// Garbage from renames + deletes, then compact: every live record
137    /// resolves identically, paths and names byte-match, sorted structures
138    /// hold without re-sorting, counters reset.
139    #[test]
140    fn compaction_drops_garbage_and_preserves_live_entries() {
141        let mut idx = build_sample();
142        // Rename storm on 100 (tombstone churn + pool garbage), one delete,
143        // one in-place dir rename (pool garbage without a tombstone), one
144        // ≥4GiB file (size-overflow remap), one cased name (orig pool).
145        for i in 0..4u64 {
146            let first_new = idx.len() as u32;
147            let name = u16s(&format!("storm_{i}.TXT"));
148            idx.upsert(&raw(100, 50, &name, false, i, i as i64));
149            idx.merge_new_into_permutations(first_new);
150        }
151        let first_new = idx.len() as u32;
152        let huge = u16s("Huge.ISO");
153        idx.upsert(&raw(700, 50, &huge, false, (7u64 << 30) + 5, 9));
154        idx.merge_new_into_permutations(first_new);
155        idx.delete(60);
156        let dir2 = u16s("docs_v2");
157        idx.rename_dir_in_place(50, &dir2, 5);
158        idx.merge_new_into_permutations(idx.len() as u32);
159
160        let live_before = idx.live_len();
161        let expect: Vec<(u64, Vec<u8>, Vec<u8>, u64)> = [5u64, 50, 100, 700]
162            .iter()
163            .map(|&rec| {
164                let id = idx.entry_by_record(rec).unwrap();
165                let mut p = Vec::new();
166                idx.append_path(id, &mut p);
167                (rec, idx.name(id).to_vec(), p, idx.size(id))
168            })
169            .collect();
170
171        let c = idx.compacted();
172        assert_eq!(c.len(), live_before);
173        assert_eq!(c.live_len(), live_before);
174        // After compaction tombstones == 0, so the ratio is exactly 0.0.
175        #[expect(clippy::float_cmp, reason = "0 tombstones yields an exact 0.0 ratio")]
176        {
177            assert_eq!(c.tombstone_ratio(), 0.0);
178        }
179        assert_eq!(c.stats("C:").dead_name_bytes, 0);
180        // Pools shrank: the storm's abandoned bytes are gone.
181        assert!(c.stats("C:").lower_pool_bytes < idx.stats("C:").lower_pool_bytes);
182
183        for (rec, name, path, size) in &expect {
184            let id = c.entry_by_record(*rec).unwrap_or_else(|| {
185                panic!("record {rec} lost in compaction");
186            });
187            assert_eq!(c.name(id), &name[..], "record {rec}");
188            let mut p = Vec::new();
189            c.append_path(id, &mut p);
190            assert_eq!(&p, path, "record {rec}");
191            assert_eq!(c.size(id), *size, "record {rec}");
192        }
193        assert_eq!(c.entry_by_record(60), None, "deleted record stays gone");
194
195        // perm_name is a strictly sorted complete permutation — without
196        // having been re-sorted.
197        let perm = c.name_permutation();
198        assert_eq!(perm.len(), c.len());
199        let mut seen: Vec<EntryId> = perm.to_vec();
200        seen.sort_unstable();
201        assert_eq!(seen, (0..c.len() as u32).collect::<Vec<_>>());
202        for w in perm.windows(2) {
203            assert!(c.cmp_by(SortKey::Name, w[0], w[1]).is_lt());
204        }
205
206        // Round-trips through a snapshot like any other index.
207        let mut buf = Vec::new();
208        c.write_snapshot(&mut buf, 1, 2).unwrap();
209        let (loaded, _, _) = VolumeIndex::read_snapshot(&mut buf.as_slice()).unwrap();
210        assert_eq!(loaded.len(), c.len());
211    }
212
213    /// Children of a tombstoned directory attach to the root (`push_raw`'s
214    /// orphan policy) instead of dangling.
215    #[test]
216    fn compaction_reattaches_orphans_of_dead_dirs() {
217        let mut idx = build_sample();
218        idx.delete(50); // "docs", parent of record 100
219        idx.merge_new_into_permutations(idx.len() as u32);
220        let c = idx.compacted();
221        let note = c.entry_by_record(100).unwrap();
222        assert_eq!(c.parent(note), VolumeIndex::ROOT);
223        let mut p = Vec::new();
224        c.append_path(note, &mut p);
225        assert_eq!(p, b"C:\\Note.TXT");
226    }
227
228    #[test]
229    fn compaction_policy_thresholds() {
230        let mut idx = build_sample();
231        assert!(
232            !idx.compaction_due_past(1),
233            "clean index must not trigger on garbage thresholds"
234        );
235        idx.delete(60);
236        // 1 of 4 entries dead = 25% > 12.5%.
237        assert!(idx.compaction_due_past(1));
238        assert!(
239            !idx.compaction_due(),
240            "tiny volumes never trigger (min-entries floor)"
241        );
242    }
243}