Skip to main content

fmf_core\index/
core.rs

1use std::any::Any;
2use std::sync::Arc;
3
4use parking_lot::Mutex;
5use rustc_hash::FxHashMap;
6
7use super::frn::FrnIndex;
8use super::{EntryId, Frn, NO_PARENT, RecordNo, SortKey, flags};
9
10/// In-memory per-volume index.
11///
12/// Struct-of-arrays entry columns, two string pools sharing one offset/length
13/// table, an FRN map, and the always-sorted name permutation
14/// (docs/ARCHITECTURE.md). One instance per indexed volume.
15pub struct VolumeIndex {
16    /// The one contiguous, sweepable pool: every entry's *folded* name at
17    /// `name_off..name_off+name_len`. Most names fold to themselves
18    /// (ADR-0004), so the original spelling is stored only where it differs
19    /// — in `orig_pool` at `orig_off`, same length (the fold is
20    /// length-preserving, wtf8.rs). `orig_off == u32::MAX` means the folded
21    /// bytes *are* the original.
22    pub(super) lower_pool: Vec<u8>,
23    pub(super) orig_pool: Vec<u8>,
24    pub(super) orig_off: Vec<u32>,
25    pub(super) name_off: Vec<u32>,
26    pub(super) name_len: Vec<u16>,
27    pub(super) parent: Vec<EntryId>,
28    /// File sizes < `u32::MAX`, 4 bytes per entry; `u32::MAX` is the sentinel
29    /// for the overflow map (≥4GiB files, ADR-0007). Read through
30    /// [`VolumeIndex::size`].
31    pub(super) size_lo: Vec<u32>,
32    pub(super) size_ovf: FxHashMap<EntryId, u64>,
33    pub(super) mtime: Vec<i64>,
34    pub(super) frn: Vec<u64>,
35    pub(super) flag: Vec<u8>,
36    pub(super) frn_index: FrnIndex,
37    /// The one always-maintained permutation: name order is the default
38    /// sort and the merge target of every USN batch. Size/mtime orders are
39    /// lazily derived caches (`query::memo::{SizePerm`, `MtimePerm`}) — built on
40    /// the first sorted query, extended per generation, never persisted.
41    pub(super) perm_name: Vec<EntryId>,
42    pub(super) content_generation: u64,
43    pub(super) structural_generation: u64,
44    /// Bumped whenever an existing directory's name or parent changes —
45    /// the two mutations that invalidate memoized descendant paths in ways
46    /// an append-only extension cannot express. Plain appends/deletes/stat
47    /// updates leave it untouched.
48    pub(super) dir_topology_generation: u64,
49    pub(super) tombstones: u32,
50    /// Abandoned name bytes across both pools (tombstoned rows and in-place
51    /// dir renames leave their old bytes behind: folded copy always, the
52    /// original copy when one existed). Compaction-trigger input. Not
53    /// persisted — recomputed from tombstones on restore, so rename gaps
54    /// make it a lower bound there.
55    pub(super) dead_name_bytes: u64,
56    /// Query-independent caches derived from index content (dir-path memo,
57    /// pool offset table, …) keyed by `content_generation` and value type.
58    /// Type-erased so the index stays ignorant of query-module types.
59    pub(super) derived_cache: Mutex<Option<DerivedCache>>,
60}
61
62pub(super) type DerivedMap = FxHashMap<std::any::TypeId, Arc<dyn Any + Send + Sync>>;
63
64/// The previous generation's values stick around (`prev`) so incremental
65/// builders can extend them instead of starting over; a value is consumed
66/// (removed) the first time its type resolves under the new generation, and
67/// anything never consumed drops on the following generation change.
68pub(super) struct DerivedCache {
69    generation: u64,
70    current: DerivedMap,
71    prev: DerivedMap,
72}
73
74// ── Shared column accessors ──────────────────────────────────────────────
75// `VolumeIndex` owns its columns; `SortColumns` borrows them so permutation
76// maintenance can hold the `&mut` perm array alongside the keys. Both read an
77// entry's folded name and size identically — these free functions are the one
78// definition each delegates to, so the pair can never drift (the same hazard
79// `SortColumns`'s own doc cites for `cmp_by`).
80
81/// Folded name bytes of `id`, from a name pool and its offset/length columns.
82#[inline]
83fn pool_lower_name<'a>(
84    lower_pool: &'a [u8],
85    name_off: &[u32],
86    name_len: &[u16],
87    id: EntryId,
88) -> &'a [u8] {
89    let off = name_off[id as usize] as usize;
90    &lower_pool[off..off + name_len[id as usize] as usize]
91}
92
93/// Size of `id` read through the u32 column + overflow map (ADR-0007).
94#[inline]
95fn column_size(size_lo: &[u32], size_ovf: &FxHashMap<EntryId, u64>, id: EntryId) -> u64 {
96    match size_lo[id as usize] {
97        u32::MAX => size_ovf[&id],
98        v => v as u64,
99    }
100}
101
102impl VolumeIndex {
103    /// Total entry slots, live plus tombstoned (the column length).
104    pub const fn len(&self) -> usize {
105        self.name_off.len()
106    }
107
108    /// True when no entries have ever been appended.
109    pub const fn is_empty(&self) -> bool {
110        self.len() == 0
111    }
112
113    /// Live entry count: total slots minus tombstones.
114    pub const fn live_len(&self) -> usize {
115        self.len() - self.tombstones as usize
116    }
117
118    /// The volume root's [`EntryId`] (always slot 0).
119    pub const ROOT: EntryId = 0;
120
121    /// The original-spelling name. Fold-identical entries (most of them)
122    /// borrow straight from the folded pool — the bytes are the same.
123    #[inline]
124    pub fn name(&self, id: EntryId) -> &[u8] {
125        match self.orig_off[id as usize] {
126            u32::MAX => self.lower_name(id),
127            off => {
128                &self.orig_pool[off as usize..off as usize + self.name_len[id as usize] as usize]
129            }
130        }
131    }
132
133    /// The case-folded name bytes of `id` (ADR-0004), straight from the
134    /// folded pool — the form every matcher compares against.
135    #[inline]
136    pub fn lower_name(&self, id: EntryId) -> &[u8] {
137        pool_lower_name(&self.lower_pool, &self.name_off, &self.name_len, id)
138    }
139
140    /// True while `id` is a real entry — false once it has been tombstoned.
141    #[inline]
142    pub fn is_live(&self, id: EntryId) -> bool {
143        self.flag[id as usize] & flags::TOMBSTONE == 0
144    }
145
146    /// Hidden/system (or under such a branch) — skipped by default queries.
147    #[inline]
148    pub fn is_excluded(&self, id: EntryId) -> bool {
149        self.flag[id as usize] & flags::EXCLUDED != 0
150    }
151
152    /// True when `id` is a directory rather than a file.
153    #[inline]
154    pub fn is_dir(&self, id: EntryId) -> bool {
155        self.flag[id as usize] & flags::IS_DIR != 0
156    }
157
158    /// True when `id` is a reparse point (symlink, junction, mount point).
159    #[inline]
160    pub fn is_reparse(&self, id: EntryId) -> bool {
161        self.flag[id as usize] & flags::REPARSE != 0
162    }
163
164    /// File size of `id` in bytes, read through the u32 column and the
165    /// overflow map for ≥4 GiB files (ADR-0007).
166    #[inline]
167    pub fn size(&self, id: EntryId) -> u64 {
168        column_size(&self.size_lo, &self.size_ovf, id)
169    }
170
171    /// The single write path for sizes — keeps the column and the overflow
172    /// map consistent in both directions (a file can shrink back under the
173    /// sentinel).
174    pub(super) fn set_size(&mut self, id: EntryId, v: u64) {
175        if v >= u32::MAX as u64 {
176            self.size_lo[id as usize] = u32::MAX;
177            self.size_ovf.insert(id, v);
178        } else {
179            self.size_lo[id as usize] = v as u32;
180            self.size_ovf.remove(&id);
181        }
182    }
183
184    /// Append form of [`Self::set_size`] (column construction).
185    pub(super) fn push_size(&mut self, v: u64) {
186        if v >= u32::MAX as u64 {
187            let id = self.size_lo.len() as EntryId;
188            self.size_lo.push(u32::MAX);
189            self.size_ovf.insert(id, v);
190        } else {
191            self.size_lo.push(v as u32);
192        }
193    }
194
195    /// Last-modification time of `id` as a Windows FILETIME tick count.
196    #[inline]
197    pub fn mtime(&self, id: EntryId) -> i64 {
198        self.mtime[id as usize]
199    }
200
201    /// The [`EntryId`] of `id`'s parent directory ([`NO_PARENT`] at the root).
202    #[inline]
203    pub fn parent(&self, id: EntryId) -> EntryId {
204        self.parent[id as usize]
205    }
206
207    /// The NTFS File Reference Number of `id`.
208    #[inline]
209    pub fn frn(&self, id: EntryId) -> Frn {
210        Frn(self.frn[id as usize])
211    }
212
213    /// The live entry for a record number, if any. Pass a [`RecordNo`] (or a
214    /// raw record-number `u64`); derive one from a full reference with
215    /// [`Frn::record`] — the type stops a full FRN being mistaken for a key.
216    pub fn entry_by_record(&self, record: impl Into<RecordNo>) -> Option<EntryId> {
217        self.frn_index.lookup(record.into(), &self.frn, &self.flag)
218    }
219
220    // Raw pool access for the pool-scan query kernel (same crate only).
221    #[inline]
222    pub(crate) fn name_off_of(&self, id: EntryId) -> u32 {
223        self.name_off[id as usize]
224    }
225
226    #[inline]
227    pub(crate) fn name_len_of(&self, id: EntryId) -> usize {
228        self.name_len[id as usize] as usize
229    }
230
231    /// True when the entry's original spelling is its folded form — the
232    /// case-exact matchers' fast path: such a name can never contain a
233    /// needle with fold-unstable characters, and for fold-stable needles
234    /// the folded comparison *is* the exact comparison.
235    #[inline]
236    pub(crate) fn is_fold_identical(&self, id: EntryId) -> bool {
237        self.orig_off[id as usize] == u32::MAX
238    }
239
240    #[inline]
241    pub(crate) fn lower_pool_bytes(&self) -> &[u8] {
242        &self.lower_pool
243    }
244
245    /// The content generation — bumped by every USN batch; open result
246    /// handles stay readable across it (docs/ARCHITECTURE.md, generation 2-tier).
247    pub const fn content_generation(&self) -> u64 {
248        self.content_generation
249    }
250
251    /// The structural generation — bumped only by compaction/rebuild, which
252    /// hard-stales open result handles (docs/ARCHITECTURE.md, generation 2-tier).
253    pub const fn structural_generation(&self) -> u64 {
254        self.structural_generation
255    }
256
257    pub(crate) const fn dir_topology_generation(&self) -> u64 {
258        self.dir_topology_generation
259    }
260
261    /// Carry the structural generation across a rebuild: a freshly built
262    /// index replacing one whose generation was `prev` must read as strictly
263    /// newer, so open result handles go hard-stale (docs/ARCHITECTURE.md,
264    /// generation 2-tier). Compaction (M2) will reuse this.
265    pub(crate) const fn bump_structural_from(&mut self, prev: u64) {
266        self.structural_generation = prev + 1;
267    }
268
269    /// Return the cached content-derived value of type `T`, rebuilding it
270    /// with `build` when the content generation moved. All cached types are
271    /// invalidated together on a generation change.
272    pub(crate) fn cached_derived<T, F>(&self, build: F) -> Arc<T>
273    where
274        T: Any + Send + Sync,
275        F: FnOnce() -> T,
276    {
277        self.with_derived(|_| build())
278    }
279
280    /// Like [`Self::cached_derived`], but on a generation change `build`
281    /// receives the previous generation's value so it can extend it
282    /// incrementally instead of rebuilding from scratch.
283    pub(crate) fn cached_derived_or_update<T, F>(&self, build: F) -> Arc<T>
284    where
285        T: Any + Send + Sync,
286        F: FnOnce(Option<Arc<T>>) -> T,
287    {
288        self.with_derived(build)
289    }
290
291    /// Read-only probe of the current generation's cached `T` — never
292    /// builds. For memory accounting (`IndexStats.derived_cache_bytes`).
293    pub(crate) fn derived_probe<T: Any + Send + Sync>(&self) -> Option<Arc<T>> {
294        let guard = self.derived_cache.lock();
295        let cache = guard.as_ref()?;
296        if cache.generation != self.content_generation {
297            return None;
298        }
299        cache
300            .current
301            .get(&std::any::TypeId::of::<T>())?
302            .clone()
303            .downcast::<T>()
304            .ok()
305    }
306
307    fn with_derived<T, F>(&self, build: F) -> Arc<T>
308    where
309        T: Any + Send + Sync,
310        F: FnOnce(Option<Arc<T>>) -> T,
311    {
312        let key = std::any::TypeId::of::<T>();
313        let mut guard = self.derived_cache.lock();
314        let cache = guard.get_or_insert_with(|| DerivedCache {
315            generation: self.content_generation,
316            current: DerivedMap::default(),
317            prev: DerivedMap::default(),
318        });
319        if cache.generation != self.content_generation {
320            cache.prev = std::mem::take(&mut cache.current);
321            cache.generation = self.content_generation;
322        }
323        if let Some(v) = cache.current.get(&key)
324            && let Ok(t) = v.clone().downcast::<T>()
325        {
326            return t;
327        }
328        let previous = cache.prev.remove(&key).and_then(|v| v.downcast::<T>().ok());
329        let t = Arc::new(build(previous));
330        cache.current.insert(key, t.clone());
331        t
332    }
333
334    /// Per-column memory accounting for the perf panel / `fmf stats`.
335    /// The map size is an estimate (hashbrown control bytes + slot padding).
336    pub fn stats(&self, volume: &str) -> crate::metrics::IndexStats {
337        let n = self.len() as u64;
338        let offsets = (self.name_off.capacity() * 4
339            + self.name_len.capacity() * 2
340            + self.orig_off.capacity() * 4) as u64;
341        // perm_name only — the lazy size/mtime permutations are accounted
342        // with the derived caches (`derived_cache_bytes`).
343        let perms = (self.perm_name.capacity() * 4) as u64;
344        // Field name kept for FFI/JSON compatibility; the structure is the
345        // sorted FRN permutation (index/frn.rs).
346        let frn_map = self.frn_index.bytes();
347        let mut s = crate::metrics::IndexStats {
348            volume: volume.to_string(),
349            entries: n,
350            live_entries: self.live_len() as u64,
351            tombstones: self.tombstones as u64,
352            // Field name kept for FFI/JSON compatibility; this is the
353            // original-spelling overflow pool (fold-identical names live
354            // only in lower_pool).
355            name_pool_bytes: self.orig_pool.capacity() as u64,
356            lower_pool_bytes: self.lower_pool.capacity() as u64,
357            offsets_bytes: offsets,
358            parent_bytes: (self.parent.capacity() * 4) as u64,
359            // Column + the overflow map (hashbrown estimate: (K,V) slot +
360            // 1 control byte per capacity slot; the map is tiny, ADR-0007).
361            size_bytes: (self.size_lo.capacity() * 4
362                + self.size_ovf.capacity() * (std::mem::size_of::<(EntryId, u64)>() + 1))
363                as u64,
364            mtime_bytes: (self.mtime.capacity() * 8) as u64,
365            frn_bytes: (self.frn.capacity() * 8) as u64,
366            flag_bytes: self.flag.capacity() as u64,
367            permutations_bytes: perms,
368            frn_map_bytes: frn_map,
369            dead_name_bytes: self.dead_name_bytes,
370            content_generation: self.content_generation,
371            structural_generation: self.structural_generation,
372            ..Default::default()
373        };
374        // dead_name_bytes already counts every abandoned copy across both
375        // pools (folded always, original when present).
376        let pool_bytes = s.name_pool_bytes + s.lower_pool_bytes;
377        s.pool_garbage_ratio = if pool_bytes > 0 {
378            self.dead_name_bytes as f64 / pool_bytes as f64
379        } else {
380            0.0
381        };
382        s.total_bytes = s.name_pool_bytes
383            + s.lower_pool_bytes
384            + s.offsets_bytes
385            + s.parent_bytes
386            + s.size_bytes
387            + s.mtime_bytes
388            + s.frn_bytes
389            + s.flag_bytes
390            + s.permutations_bytes
391            + s.frn_map_bytes;
392        s.bytes_per_entry = if n > 0 {
393            s.total_bytes as f64 / n as f64
394        } else {
395            0.0
396        };
397        s
398    }
399
400    /// Trim over-allocated columns after a bulk build.
401    pub fn shrink_to_fit(&mut self) {
402        self.frn_index.shrink_to_fit();
403        self.lower_pool.shrink_to_fit();
404        self.orig_pool.shrink_to_fit();
405        self.orig_off.shrink_to_fit();
406        self.name_off.shrink_to_fit();
407        self.name_len.shrink_to_fit();
408        self.parent.shrink_to_fit();
409        self.size_lo.shrink_to_fit();
410        self.size_ovf.shrink_to_fit();
411        self.mtime.shrink_to_fit();
412        self.frn.shrink_to_fit();
413        self.flag.shrink_to_fit();
414        self.perm_name.shrink_to_fit();
415    }
416
417    /// The always-maintained name-sorted permutation: entry ids in default
418    /// (folded-name) sort order, the merge target of every USN batch.
419    pub fn name_permutation(&self) -> &[EntryId] {
420        &self.perm_name
421    }
422
423    /// Append the full WTF-8 path of `id` ("C:\dir\file.txt") to `out`.
424    /// Built lazily from the parent chain — paths are never stored.
425    pub fn append_path(&self, id: EntryId, out: &mut Vec<u8>) {
426        self.append_parent_path(id, out);
427        if id != Self::ROOT {
428            out.extend_from_slice(self.name(id));
429        }
430    }
431
432    /// Append the path of `id`'s parent directory, including a trailing `\`.
433    pub fn append_parent_path(&self, id: EntryId, out: &mut Vec<u8>) {
434        let mut chain = [0u32; 128];
435        let mut depth = 0;
436        let mut cur = if id == Self::ROOT {
437            NO_PARENT
438        } else {
439            self.parent(id)
440        };
441        while cur != NO_PARENT && depth < chain.len() {
442            chain[depth] = cur;
443            depth += 1;
444            cur = if cur == Self::ROOT {
445                NO_PARENT
446            } else {
447                self.parent(cur)
448            };
449        }
450        for &c in chain[..depth].iter().rev() {
451            // The synthetic scope-mode ROOT (ADR-0024) carries no name; skip
452            // it (name + separator) so multi-root paths don't gain a leading
453            // `\`. Real $MFT entries always have a name, so this is inert for
454            // the privileged path (its ROOT is the volume label, e.g. "C:").
455            let name = self.name(c);
456            if !name.is_empty() {
457                out.extend_from_slice(name);
458                out.push(b'\\');
459            }
460        }
461    }
462
463    /// Fraction of slots that are tombstones (0.0–1.0) — the compaction
464    /// trigger input. 0.0 for an empty index.
465    pub fn tombstone_ratio(&self) -> f64 {
466        if self.is_empty() {
467            0.0
468        } else {
469            self.tombstones as f64 / self.len() as f64
470        }
471    }
472
473    /// The one definition of each sort key's strict total order (id
474    /// tie-break) — `pub(crate)` so the lazy permutation caches in the
475    /// query layer sort by exactly the same order the merge maintains.
476    #[inline]
477    pub(crate) fn cmp_by(&self, key: SortKey, a: EntryId, b: EntryId) -> std::cmp::Ordering {
478        self.sort_columns().cmp_by(key, a, b)
479    }
480
481    pub(super) fn sort_columns(&self) -> SortColumns<'_> {
482        SortColumns {
483            lower_pool: &self.lower_pool,
484            name_off: &self.name_off,
485            name_len: &self.name_len,
486            size_lo: &self.size_lo,
487            size_ovf: &self.size_ovf,
488            mtime: &self.mtime,
489        }
490    }
491}
492
493/// Borrowed view of the sort-key columns, so permutation maintenance can
494/// hold `&mut` permutation arrays while comparing through the one
495/// definition of each key's order (a drifting duplicate of `cmp_by` would
496/// silently corrupt the merge).
497pub(super) struct SortColumns<'a> {
498    lower_pool: &'a [u8],
499    name_off: &'a [u32],
500    name_len: &'a [u16],
501    size_lo: &'a [u32],
502    size_ovf: &'a FxHashMap<EntryId, u64>,
503    mtime: &'a [i64],
504}
505
506impl<'a> SortColumns<'a> {
507    pub(super) const fn new(
508        lower_pool: &'a [u8],
509        name_off: &'a [u32],
510        name_len: &'a [u16],
511        size_lo: &'a [u32],
512        size_ovf: &'a FxHashMap<EntryId, u64>,
513        mtime: &'a [i64],
514    ) -> Self {
515        Self {
516            lower_pool,
517            name_off,
518            name_len,
519            size_lo,
520            size_ovf,
521            mtime,
522        }
523    }
524
525    #[inline]
526    fn lower_name(&self, id: EntryId) -> &[u8] {
527        pool_lower_name(self.lower_pool, self.name_off, self.name_len, id)
528    }
529
530    #[inline]
531    fn size_of(&self, id: EntryId) -> u64 {
532        column_size(self.size_lo, self.size_ovf, id)
533    }
534
535    /// Strict total order (id tie-break): no two distinct ids compare equal,
536    /// which is what makes merged permutations byte-deterministic.
537    #[inline]
538    pub(super) fn cmp_by(&self, key: SortKey, a: EntryId, b: EntryId) -> std::cmp::Ordering {
539        match key {
540            SortKey::Name => self.lower_name(a).cmp(self.lower_name(b)).then(a.cmp(&b)),
541            SortKey::Size => self.size_of(a).cmp(&self.size_of(b)).then(a.cmp(&b)),
542            SortKey::Mtime => self.mtime[a as usize]
543                .cmp(&self.mtime[b as usize])
544                .then(a.cmp(&b)),
545        }
546    }
547}
548
549#[cfg(test)]
550mod tests {
551
552    use crate::index::testutil::build_sample;
553
554    #[test]
555    fn full_path_builds_lazily() {
556        let idx = build_sample();
557        let note = idx.entry_by_record(100).unwrap();
558        let mut p = Vec::new();
559        idx.append_path(note, &mut p);
560        assert_eq!(p, b"C:\\docs\\Note.TXT");
561
562        let mut pp = Vec::new();
563        idx.append_parent_path(note, &mut pp);
564        assert_eq!(pp, b"C:\\docs\\");
565    }
566
567    #[test]
568    fn name_permutation_is_sorted() {
569        let idx = build_sample();
570        let by_name: Vec<&[u8]> = idx
571            .name_permutation()
572            .iter()
573            .map(|&id| idx.lower_name(id))
574            .collect();
575        let mut expect = by_name.clone();
576        expect.sort();
577        assert_eq!(by_name, expect);
578    }
579}