Skip to main content

fmf_core\index/
builder.rs

1use parking_lot::Mutex;
2
3use super::frn::FrnIndex;
4use super::{EncodedEntry, EntryId, Frn, NO_PARENT, RawEntry, SortKey, VolumeIndex};
5
6/// Two-pass builder for the initial scan: collect everything, then resolve
7/// parents and sort the permutations (scan order ≠ parent-before-child).
8pub struct VolumeIndexBuilder {
9    idx: VolumeIndex,
10    parent_frns: Vec<Frn>,
11}
12
13/// Stage timings of [`VolumeIndexBuilder::finish_timed`], in milliseconds.
14#[derive(Debug, Default, Clone, Copy)]
15pub struct FinishTimings {
16    /// Parent resolution + EXCLUDED propagation.
17    pub build_ms: u64,
18    /// The name-permutation sort.
19    pub sort_ms: u64,
20}
21
22impl VolumeIndexBuilder {
23    /// `volume_label` is the root display name, e.g. `C:`.
24    /// `root_record` is the MFT record number of the root directory (5 on NTFS).
25    #[must_use]
26    pub fn new(volume_label: &str, root_record: u64) -> Self {
27        let mut idx = VolumeIndex {
28            lower_pool: Vec::new(),
29            orig_pool: Vec::new(),
30            orig_off: Vec::new(),
31            name_off: Vec::new(),
32            name_len: Vec::new(),
33            parent: Vec::new(),
34            size_lo: Vec::new(),
35            size_ovf: rustc_hash::FxHashMap::default(),
36            mtime: Vec::new(),
37            frn: Vec::new(),
38            flag: Vec::new(),
39            frn_index: FrnIndex::default(),
40            perm_name: Vec::new(),
41            content_generation: 0,
42            structural_generation: 0,
43            dir_topology_generation: 0,
44            tombstones: 0,
45            dead_name_bytes: 0,
46            derived_cache: Mutex::new(None),
47        };
48        let units: Vec<u16> = volume_label.encode_utf16().collect();
49        // Parents are provisional ROOT during the build — finish() resolves
50        // them all in one parallel pass once the FRN index exists.
51        let root = idx.push_raw(
52            &RawEntry {
53                parent_frn: Frn(u64::MAX), // resolves to nothing → NO_PARENT below
54                frn: Frn(root_record),
55                name_utf16: &units,
56                is_dir: true,
57                is_reparse: false,
58                is_hidden: false,
59                is_system: false,
60                size: 0,
61                mtime: 0,
62            },
63            VolumeIndex::ROOT,
64        );
65        debug_assert_eq!(root, VolumeIndex::ROOT);
66        idx.parent[root as usize] = NO_PARENT;
67        Self {
68            idx,
69            parent_frns: vec![Frn(u64::MAX)],
70        }
71    }
72
73    /// Append a scanned entry with a provisional ROOT parent; the real
74    /// parent is resolved later in [`Self::finish`] once the FRN index exists.
75    pub fn push(&mut self, e: RawEntry) {
76        let id = self.idx.push_raw(&e, VolumeIndex::ROOT);
77        self.parent_frns.push(e.parent_frn);
78        debug_assert_eq!(self.parent_frns.len(), id as usize + 1);
79    }
80
81    /// Append an entry whose name was WTF-8 encoded off-thread (parallel
82    /// scan workers). Identical semantics to [`Self::push`].
83    pub fn push_encoded(&mut self, e: EncodedEntry) {
84        let id = self.idx.push_encoded(&e, VolumeIndex::ROOT);
85        self.parent_frns.push(e.parent_frn);
86        debug_assert_eq!(self.parent_frns.len(), id as usize + 1);
87    }
88
89    /// The number of entries pushed so far (including the root).
90    pub const fn len(&self) -> usize {
91        self.idx.len()
92    }
93
94    /// True when no entries have been pushed (never true after [`Self::new`],
95    /// which seeds the root).
96    pub const fn is_empty(&self) -> bool {
97        self.idx.is_empty()
98    }
99
100    /// Run the two-pass finalization and return the queryable [`VolumeIndex`],
101    /// discarding the stage timings.
102    pub fn finish(self) -> VolumeIndex {
103        self.finish_timed().0
104    }
105
106    /// Run the two-pass finalization (resolve parents, propagate EXCLUDED,
107    /// build the name sort) and return the [`VolumeIndex`] with stage timings.
108    pub fn finish_timed(mut self) -> (VolumeIndex, FinishTimings) {
109        use rayon::prelude::*;
110
111        let mut timings = FinishTimings::default();
112        let t = std::time::Instant::now();
113
114        // Pass 1.5: the FRN index, in one parallel sort (ADR-0005).
115        self.idx.frn_index = FrnIndex::build(&self.idx.frn, &self.idx.flag);
116
117        // Pass 2: resolve parents now that every record is findable.
118        // Read-only lookups, one write per slot — embarrassingly parallel.
119        {
120            let VolumeIndex {
121                frn_index,
122                parent,
123                frn,
124                flag,
125                ..
126            } = &mut self.idx;
127            let parent_frns = &self.parent_frns;
128            parent
129                .par_iter_mut()
130                .enumerate()
131                .skip(1) // the root keeps NO_PARENT
132                .for_each(|(i, p)| {
133                    *p = frn_index
134                        .lookup(parent_frns[i].record(), frn, flag)
135                        .unwrap_or(VolumeIndex::ROOT);
136                });
137        }
138
139        // Pass 3: propagate EXCLUDED down resolved parent chains. `done`
140        // marks finalized entries; the stack unwinds each unresolved chain
141        // top-down exactly once → O(n) total.
142        {
143            let n = self.idx.len();
144            let mut done = vec![false; n];
145            let root = VolumeIndex::ROOT as usize;
146            self.idx.recompute_excluded(VolumeIndex::ROOT);
147            done[root] = true;
148            let mut stack: Vec<EntryId> = Vec::new();
149            for start in 0..n as u32 {
150                let mut cur = start;
151                while !done[cur as usize] && stack.len() < 4096 {
152                    stack.push(cur);
153                    cur = self.idx.parent[cur as usize];
154                    if cur == NO_PARENT {
155                        break;
156                    }
157                }
158                while let Some(id) = stack.pop() {
159                    self.idx.recompute_excluded(id);
160                    done[id as usize] = true;
161                }
162            }
163        }
164
165        timings.build_ms = t.elapsed().as_millis() as u64;
166        let t = std::time::Instant::now();
167
168        // Name order only — the default sort, needed before the volume can
169        // serve its first query. Size/mtime orders build lazily on the
170        // first sorted query (query::memo, ADR-0006).
171        let mut perm_name: Vec<EntryId> = (0..self.idx.len() as u32).collect();
172        let idx = &self.idx;
173        perm_name.par_sort_unstable_by(|&a, &b| idx.cmp_by(SortKey::Name, a, b));
174        self.idx.perm_name = perm_name;
175        timings.sort_ms = t.elapsed().as_millis() as u64;
176        (self.idx, timings)
177    }
178}
179
180#[cfg(test)]
181mod tests {
182    use super::*;
183    use crate::index::testutil::{build_sample, raw, raw_attr, u16s};
184
185    #[test]
186    fn parents_resolve_across_scan_order() {
187        let idx = build_sample();
188        let note = idx.entry_by_record(100).unwrap();
189        let docs = idx.entry_by_record(50).unwrap();
190        assert_eq!(idx.parent(note), docs);
191        assert_eq!(idx.parent(docs), VolumeIndex::ROOT);
192    }
193
194    #[test]
195    fn orphan_attaches_to_root() {
196        let mut b = VolumeIndexBuilder::new("C:", 5);
197        let name = u16s("lost.txt");
198        b.push(raw(7, 999_999, &name, false, 1, 1));
199        let idx = b.finish();
200        let id = idx.entry_by_record(7).unwrap();
201        assert_eq!(idx.parent(id), VolumeIndex::ROOT);
202    }
203
204    #[test]
205    fn excluded_propagates_down_branches() {
206        // C:\ ├─ $Recycle.Bin\(system dir) │ └─ sub\ │   └─ ghost.txt(plain)
207        //     ├─ .git(hidden file)  └─ normal.txt
208        let bin = u16s("$Recycle.Bin");
209        let sub = u16s("sub");
210        let ghost = u16s("ghost.txt");
211        let git = u16s(".git");
212        let normal = u16s("normal.txt");
213        let mut b = VolumeIndexBuilder::new("C:", 5);
214        // Push the deep child FIRST so propagation must survive scan order.
215        b.push(raw_attr(30, 20, &ghost, false, false, false));
216        b.push(raw_attr(20, 10, &sub, true, false, false));
217        b.push(raw_attr(10, 5, &bin, true, false, true)); // system
218        b.push(raw_attr(40, 5, &git, false, true, false)); // hidden
219        b.push(raw_attr(50, 5, &normal, false, false, false));
220        let idx = b.finish();
221
222        for (rec, want) in [(10, true), (20, true), (30, true), (40, true), (50, false)] {
223            let id = idx.entry_by_record(rec).unwrap();
224            assert_eq!(idx.is_excluded(id), want, "record {rec}");
225        }
226    }
227}