Skip to main content

fmf_core\index/
mutate.rs

1use crate::wtf8;
2
3use super::core::SortColumns;
4use super::{
5    EncodedEntry, EntryId, Frn, NO_PARENT, RawEntry, RecordNo, SortKey, VolumeIndex, flags,
6    merge_sorted_tail,
7};
8
9impl VolumeIndex {
10    // ── Incremental mutation (USN batches; see module docs) ──────────────
11
12    /// Pool bytes `id` owns that compaction could reclaim: the folded copy
13    /// always, plus the original copy when one exists.
14    fn owned_name_bytes(&self, id: EntryId) -> u64 {
15        let len = self.name_len[id as usize] as u64;
16        if self.orig_off[id as usize] == u32::MAX {
17            len
18        } else {
19            len * 2
20        }
21    }
22
23    /// Insert or replace an entry for `record`. Replacement (same record
24    /// number) tombstones the old entry — this is how renames work.
25    /// Returns the new id. Caller must finish the batch with
26    /// [`Self::merge_new_into_permutations`].
27    pub fn upsert(&mut self, e: &RawEntry) -> EntryId {
28        if let Some(old) = self.entry_by_record(e.frn.record()) {
29            self.flag[old as usize] |= flags::TOMBSTONE;
30            self.tombstones += 1;
31            self.dead_name_bytes += self.owned_name_bytes(old);
32        }
33        // Parents are already live on the USN path; unknown ones attach to
34        // root (orphan records do occur in real MFTs).
35        let parent = self
36            .entry_by_record(e.parent_frn.record())
37            .unwrap_or(Self::ROOT);
38        self.push_raw(e, parent)
39    }
40
41    /// Tombstoning is the whole deletion: the FRN index never finds dead
42    /// entries (liveness filter), so there is nothing to unmap.
43    pub fn delete(&mut self, record: impl Into<RecordNo>) -> Option<EntryId> {
44        let id = self.entry_by_record(record)?;
45        self.flag[id as usize] |= flags::TOMBSTONE;
46        self.tombstones += 1;
47        self.dead_name_bytes += self.owned_name_bytes(id);
48        Some(id)
49    }
50
51    /// Move `record` under a new parent. Cheap: no permutation depends on
52    /// the path, and child paths rebuild lazily. A corrupt record naming
53    /// itself as parent keeps its current parent (no self-cycles).
54    pub fn reparent(
55        &mut self,
56        record: impl Into<RecordNo>,
57        new_parent_record: impl Into<RecordNo>,
58    ) -> Option<EntryId> {
59        let id = self.entry_by_record(record)?;
60        let parent = self
61            .entry_by_record(new_parent_record)
62            .unwrap_or(Self::ROOT);
63        if parent != id {
64            self.parent[id as usize] = parent;
65        }
66        self.recompute_excluded(id);
67        if self.is_dir(id) {
68            self.dir_topology_generation += 1; // descendant paths moved
69        }
70        Some(id)
71    }
72
73    /// Rename/move a *directory* in place. Directories must keep their
74    /// `EntryId` stable — children's `parent` fields point at it — so instead
75    /// of tombstone+new (the file path), the name is swapped and the entry is
76    /// repositioned inside `perm_name`. O(len) per rename; directory renames
77    /// are rare enough that this beats invalidating every child.
78    pub fn rename_dir_in_place(
79        &mut self,
80        record: impl Into<RecordNo>,
81        name_utf16: &[u16],
82        new_parent_record: impl Into<RecordNo>,
83    ) -> Option<EntryId> {
84        let id = self.entry_by_record(record)?;
85        let pos = self.perm_name.iter().position(|&x| x == id)?;
86        self.perm_name.remove(pos);
87
88        // The old name bytes are abandoned where they live.
89        self.dead_name_bytes += self.owned_name_bytes(id);
90        let off = self.lower_pool.len();
91        let mut orig = Vec::with_capacity(name_utf16.len() * 3);
92        wtf8::push_wtf8_pair(name_utf16, &mut orig, &mut self.lower_pool);
93        self.name_off[id as usize] = off as u32;
94        self.name_len[id as usize] = (self.lower_pool.len() - off) as u16;
95        self.orig_off[id as usize] = self.push_orig_if_differs(off, &orig);
96        let parent = self
97            .entry_by_record(new_parent_record)
98            .unwrap_or(Self::ROOT);
99        if parent != id {
100            self.parent[id as usize] = parent;
101        }
102        self.recompute_excluded(id);
103
104        let ins = self
105            .perm_name
106            .binary_search_by(|&x| self.cmp_by(SortKey::Name, x, id))
107            .unwrap_or_else(|e| e);
108        self.perm_name.insert(ins, id);
109        self.dir_topology_generation += 1; // descendant paths renamed
110        Some(id)
111    }
112
113    /// Update size/mtime in place (`USN_REASON_DATA`_* without a name change).
114    pub fn update_stat(
115        &mut self,
116        record: impl Into<RecordNo>,
117        size: u64,
118        mtime: i64,
119    ) -> Option<EntryId> {
120        let id = self.entry_by_record(record)?;
121        self.set_size(id, size);
122        self.mtime[id as usize] = mtime;
123        Some(id)
124    }
125
126    /// Merge entries `first_new..len` (already appended, unsorted) into the
127    /// name permutation (in place — see `merge_sorted_tail`), then bump
128    /// the content generation. Call once per USN batch. The lazy size/mtime
129    /// permutation caches catch up on their next sorted query (the
130    /// generation bump is their invalidation signal).
131    pub fn merge_new_into_permutations(&mut self, first_new: EntryId) {
132        // The FRN index rides the same batch boundary (its own watermark).
133        {
134            let Self {
135                frn_index,
136                frn,
137                flag,
138                ..
139            } = self;
140            frn_index.merge_appended(frn, flag);
141        }
142        let mut batch: Vec<EntryId> = (first_new..self.len() as u32).collect();
143        if !batch.is_empty() {
144            // Split the borrow: the `&mut` permutation alongside the shared
145            // key columns, comparing through the same SortColumns order
146            // that built it.
147            let Self { perm_name, .. } = self;
148            let cols = SortColumns::new(
149                &self.lower_pool,
150                &self.name_off,
151                &self.name_len,
152                &self.size_lo,
153                &self.size_ovf,
154                &self.mtime,
155            );
156            batch.sort_unstable_by(|&a, &b| cols.cmp_by(SortKey::Name, a, b));
157            merge_sorted_tail(perm_name, &batch, |a, b| cols.cmp_by(SortKey::Name, a, b));
158        }
159        self.content_generation += 1;
160    }
161
162    /// Store the original spelling only when it differs from the folded
163    /// bytes just appended at `lower_off` — the fold-identical majority
164    /// costs nothing beyond the sentinel (ADR-0004).
165    fn push_orig_if_differs(&mut self, lower_off: usize, orig: &[u8]) -> u32 {
166        if orig == &self.lower_pool[lower_off..] {
167            u32::MAX
168        } else {
169            // < MAX, not <=: u32::MAX is the fold-identical sentinel.
170            assert!(
171                self.orig_pool.len() + orig.len() < u32::MAX as usize,
172                "orig pool overflow"
173            );
174            let off = self.orig_pool.len() as u32;
175            self.orig_pool.extend_from_slice(orig);
176            off
177        }
178    }
179
180    /// Append with a pre-resolved parent: the USN path resolves against the
181    /// live index (see [`Self::upsert`]); the initial-scan builder passes a
182    /// provisional ROOT because `finish()` re-resolves every parent anyway —
183    /// a per-push lookup against the unmerged FRN tail would be O(n²) there.
184    pub(super) fn push_raw(&mut self, e: &RawEntry, parent: EntryId) -> EntryId {
185        assert!(
186            self.lower_pool.len() + e.name_utf16.len() * 4 < u32::MAX as usize,
187            "name pool overflow"
188        );
189        let off = self.lower_pool.len();
190        let mut orig = Vec::with_capacity(e.name_utf16.len() * 3);
191        wtf8::push_wtf8_pair(e.name_utf16, &mut orig, &mut self.lower_pool);
192        let orig_off = self.push_orig_if_differs(off, &orig);
193        self.push_columns(
194            off,
195            orig_off,
196            parent,
197            e.frn,
198            e.size,
199            e.mtime,
200            e.is_dir,
201            e.is_reparse,
202            e.is_hidden,
203            e.is_system,
204        )
205    }
206
207    pub(super) fn push_encoded(&mut self, e: &EncodedEntry, parent: EntryId) -> EntryId {
208        debug_assert_eq!(e.name_wtf8.len(), e.lower_wtf8.len());
209        assert!(
210            self.lower_pool.len() + e.lower_wtf8.len() < u32::MAX as usize,
211            "name pool overflow"
212        );
213        let off = self.lower_pool.len();
214        self.lower_pool.extend_from_slice(e.lower_wtf8);
215        let orig_off = self.push_orig_if_differs(off, e.name_wtf8);
216        self.push_columns(
217            off,
218            orig_off,
219            parent,
220            e.frn,
221            e.size,
222            e.mtime,
223            e.is_dir,
224            e.is_reparse,
225            e.is_hidden,
226            e.is_system,
227        )
228    }
229
230    /// Shared column append after the name bytes already landed in the pools
231    /// at `off`/`orig_off`. The flag/parent logic must stay identical between
232    /// the utf16 (`push_raw`) and pre-encoded (`push_encoded`) entry points.
233    #[allow(clippy::too_many_arguments)]
234    fn push_columns(
235        &mut self,
236        off: usize,
237        orig_off: u32,
238        parent: EntryId,
239        frn: Frn,
240        size: u64,
241        mtime: i64,
242        is_dir: bool,
243        is_reparse: bool,
244        is_hidden: bool,
245        is_system: bool,
246    ) -> EntryId {
247        assert!(
248            self.len() < u32::MAX as usize - 1,
249            "volume entry count overflow"
250        );
251        let id = self.len() as EntryId;
252        self.name_off.push(off as u32);
253        self.name_len.push((self.lower_pool.len() - off) as u16);
254        self.orig_off.push(orig_off);
255        self.parent.push(parent);
256        self.push_size(size);
257        self.mtime.push(mtime);
258        self.frn.push(frn.0);
259        let mut f = 0u8;
260        if is_dir {
261            f |= flags::IS_DIR;
262        }
263        if is_reparse {
264            f |= flags::REPARSE;
265        }
266        if is_hidden {
267            f |= flags::HIDDEN;
268        }
269        if is_system {
270            f |= flags::SYSTEM;
271        }
272        // Provisional during the initial scan (parents may resolve later —
273        // the builder recomputes in finish()); exact on the USN path where
274        // parents are already live.
275        let parent_excluded = self
276            .flag
277            .get(parent as usize)
278            .is_some_and(|pf| pf & flags::EXCLUDED != 0);
279        if is_hidden || is_system || parent_excluded {
280            f |= flags::EXCLUDED;
281        }
282        self.flag.push(f);
283        id
284    }
285
286    /// Re-derive EXCLUDED for `id` from its own H/S bits and current parent.
287    pub(super) fn recompute_excluded(&mut self, id: EntryId) {
288        let p = self.parent[id as usize];
289        let inherited = p != NO_PARENT && p != id && self.flag[p as usize] & flags::EXCLUDED != 0;
290        let own = self.flag[id as usize] & (flags::HIDDEN | flags::SYSTEM) != 0;
291        if own || inherited {
292            self.flag[id as usize] |= flags::EXCLUDED;
293        } else {
294            self.flag[id as usize] &= !flags::EXCLUDED;
295        }
296    }
297
298    /// Update raw attribute bits (USN `BASIC_INFO_CHANGE`) and the derived
299    /// EXCLUDED bit.
300    pub fn update_attrs(
301        &mut self,
302        record: impl Into<RecordNo>,
303        is_hidden: bool,
304        is_system: bool,
305    ) -> Option<EntryId> {
306        let id = self.entry_by_record(record)?;
307        let f = &mut self.flag[id as usize];
308        *f = (*f & !(flags::HIDDEN | flags::SYSTEM))
309            | if is_hidden { flags::HIDDEN } else { 0 }
310            | if is_system { flags::SYSTEM } else { 0 };
311        self.recompute_excluded(id);
312        Some(id)
313    }
314}
315
316#[cfg(test)]
317mod tests {
318    use super::*;
319    use crate::index::VolumeIndexBuilder;
320    use crate::index::testutil::{build_sample, raw, raw_attr, u16s};
321
322    #[test]
323    fn rename_is_tombstone_plus_new_entry() {
324        let mut idx = build_sample();
325        let old = idx.entry_by_record(100).unwrap();
326        let first_new = idx.len() as u32;
327        let renamed = u16s("renamed.txt");
328        let mut e = raw(100, 50, &renamed, false, 10, 300);
329        e.frn = idx.frn(old); // same FRN, new name
330        let new_id = idx.upsert(&e);
331        idx.merge_new_into_permutations(first_new);
332
333        assert!(!idx.is_live(old));
334        assert!(idx.is_live(new_id));
335        assert_eq!(idx.entry_by_record(100), Some(new_id));
336        assert_eq!(idx.name(new_id), b"renamed.txt");
337        // The name permutation contains the new id in sorted position.
338        let pos = idx
339            .name_permutation()
340            .iter()
341            .position(|&i| i == new_id)
342            .unwrap();
343        let perm = idx.name_permutation();
344        if pos > 0 {
345            assert!(idx.lower_name(perm[pos - 1]) <= idx.lower_name(new_id));
346        }
347        if pos + 1 < perm.len() {
348            assert!(idx.lower_name(new_id) <= idx.lower_name(perm[pos + 1]));
349        }
350    }
351
352    #[test]
353    fn delete_and_reparent() {
354        let mut idx = build_sample();
355        let big = idx.entry_by_record(60).unwrap();
356        idx.reparent(60, 50);
357        let docs = idx.entry_by_record(50).unwrap();
358        assert_eq!(idx.parent(big), docs);
359
360        idx.delete(60);
361        assert!(!idx.is_live(big));
362        assert_eq!(idx.entry_by_record(60), None);
363        assert!(idx.tombstone_ratio() > 0.0);
364    }
365
366    #[test]
367    fn usn_insert_and_moves_track_exclusion() {
368        let sysdir = u16s("sysdir");
369        let normal = u16s("docs");
370        let mut b = VolumeIndexBuilder::new("C:", 5);
371        b.push(raw_attr(10, 5, &sysdir, true, false, true));
372        b.push(raw_attr(20, 5, &normal, true, false, false));
373        let mut idx = b.finish();
374
375        // New plain file created under the system dir → inherits.
376        let name = u16s("payload.tmp");
377        let first_new = idx.len() as u32;
378        let id = idx.upsert(&raw_attr(30, 10, &name, false, false, false));
379        idx.merge_new_into_permutations(first_new);
380        assert!(idx.is_excluded(id));
381
382        // Moved out into a normal dir → bit clears.
383        idx.reparent(30, 20);
384        assert!(!idx.is_excluded(id));
385
386        // Attribute change marks it hidden → re-excluded.
387        idx.update_attrs(30, true, false);
388        assert!(idx.is_excluded(id));
389    }
390
391    /// `perm_name` must stay a sorted permutation of every entry id.
392    fn assert_perm_name_sorted(idx: &VolumeIndex) {
393        let perm = idx.name_permutation();
394        assert_eq!(perm.len(), idx.len(), "perm_name must cover every entry");
395        let mut seen: Vec<EntryId> = perm.to_vec();
396        seen.sort_unstable();
397        assert_eq!(seen, (0..idx.len() as u32).collect::<Vec<_>>());
398        for w in perm.windows(2) {
399            assert!(
400                idx.cmp_by(SortKey::Name, w[0], w[1]).is_lt(),
401                "perm_name out of order at {w:?}"
402            );
403        }
404    }
405
406    #[test]
407    fn rename_dir_in_place_keeps_permutation_sorted() {
408        let mut b = VolumeIndexBuilder::new("C:", 5);
409        let (alpha, mike, zulu, child) = (u16s("alpha"), u16s("mike"), u16s("zulu"), u16s("a.txt"));
410        b.push(raw(10, 5, &alpha, true, 0, 1));
411        b.push(raw(20, 5, &mike, true, 0, 2));
412        b.push(raw(30, 5, &zulu, true, 0, 3));
413        b.push(raw(11, 10, &child, false, 1, 4));
414        let mut idx = b.finish();
415        let dir = idx.entry_by_record(10).unwrap();
416
417        // Move toward the end of the name order, then to the front.
418        let zz = u16s("zz_renamed");
419        assert_eq!(idx.rename_dir_in_place(10, &zz, 5), Some(dir));
420        assert_eq!(idx.name(dir), b"zz_renamed");
421        assert_perm_name_sorted(&idx);
422        let first = u16s("0_first");
423        assert_eq!(idx.rename_dir_in_place(10, &first, 5), Some(dir));
424        assert_perm_name_sorted(&idx);
425
426        // In place: same EntryId, no tombstone, children follow lazily.
427        assert_eq!(idx.entry_by_record(10), Some(dir));
428        assert_eq!(idx.len(), 5);
429        assert_eq!(idx.live_len(), 5);
430        let c = idx.entry_by_record(11).unwrap();
431        let mut p = Vec::new();
432        idx.append_path(c, &mut p);
433        assert_eq!(p, b"C:\\0_first\\a.txt");
434        // Name renames never touch sizes/mtimes, so the lazy size/mtime
435        // orders (query::memo) stay valid without any signal from here.
436    }
437
438    #[test]
439    fn mutations_on_unknown_records_are_safe_noops() {
440        let mut idx = build_sample();
441        let generation = idx.content_generation();
442        let perm_before = idx.name_permutation().to_vec();
443        let ghost = u16s("ghost");
444        assert_eq!(idx.rename_dir_in_place(9999, &ghost, 5), None);
445        assert_eq!(idx.delete(9999), None);
446        assert_eq!(idx.update_stat(9999, 1, 1), None);
447        assert_eq!(idx.update_attrs(9999, true, true), None);
448        assert_eq!(idx.reparent(9999, 5), None);
449        assert_eq!(idx.len(), 4);
450        assert_eq!(idx.live_len(), 4);
451        assert_eq!(idx.name_permutation(), perm_before.as_slice());
452        assert_eq!(idx.content_generation(), generation);
453    }
454
455    #[test]
456    fn rename_dir_with_itself_as_parent_keeps_current_parent() {
457        let mut b = VolumeIndexBuilder::new("C:", 5);
458        let (top, sub) = (u16s("top"), u16s("sub"));
459        b.push(raw(10, 5, &top, true, 0, 1));
460        b.push(raw(20, 10, &sub, true, 0, 2));
461        let mut idx = b.finish();
462        let top_id = idx.entry_by_record(10).unwrap();
463        let sub_id = idx.entry_by_record(20).unwrap();
464
465        // new_parent_record == own record: the parent write is guarded, no
466        // self-cycle is created and the path chain still terminates.
467        let renamed = u16s("renamed");
468        assert_eq!(idx.rename_dir_in_place(20, &renamed, 20), Some(sub_id));
469        assert_eq!(idx.parent(sub_id), top_id);
470        let mut p = Vec::new();
471        idx.append_path(sub_id, &mut p);
472        assert_eq!(p, b"C:\\top\\renamed");
473        assert_perm_name_sorted(&idx);
474
475        // Unknown new parent attaches to the root (current pinned behavior,
476        // same as push_raw's orphan handling).
477        let renamed2 = u16s("renamed2");
478        assert_eq!(
479            idx.rename_dir_in_place(20, &renamed2, 424_242),
480            Some(sub_id)
481        );
482        assert_eq!(idx.parent(sub_id), VolumeIndex::ROOT);
483    }
484
485    #[test]
486    fn reparent_to_self_keeps_current_parent() {
487        // A corrupt USN record whose parent FRN equals its own FRN must not
488        // create a self-cycle (same guard as rename_dir_in_place).
489        let mut idx = build_sample();
490        let docs = idx.entry_by_record(50).unwrap();
491        let before = idx.parent(docs);
492        assert_eq!(idx.reparent(50, 50), Some(docs));
493        assert_eq!(idx.parent(docs), before);
494    }
495
496    #[test]
497    fn update_attrs_recomputes_excluded_from_own_and_inherited_bits() {
498        let mut b = VolumeIndexBuilder::new("C:", 5);
499        let (sysdir, plain, f, g) = (u16s("sysdir"), u16s("plain"), u16s("f.txt"), u16s("g.txt"));
500        b.push(raw_attr(10, 5, &sysdir, true, false, true));
501        b.push(raw_attr(20, 5, &plain, true, false, false));
502        b.push(raw_attr(30, 20, &f, false, false, false));
503        b.push(raw_attr(40, 20, &g, false, false, false));
504        let mut idx = b.finish();
505        let f_id = idx.entry_by_record(30).unwrap();
506        let g_id = idx.entry_by_record(40).unwrap();
507        assert!(!idx.is_excluded(f_id));
508
509        // Own hidden bit set → excluded; cleared again → plain.
510        idx.update_attrs(30, true, false).unwrap();
511        assert!(idx.is_excluded(f_id));
512        idx.update_attrs(30, false, false).unwrap();
513        assert!(!idx.is_excluded(f_id));
514
515        // Under an excluded parent, clearing own bits keeps the inherited bit.
516        idx.reparent(30, 10).unwrap();
517        assert!(idx.is_excluded(f_id));
518        idx.update_attrs(30, false, false).unwrap();
519        assert!(idx.is_excluded(f_id));
520
521        // Marking a dir hidden excludes the dir itself; existing children
522        // keep their stale bit until the next rescan (pinned current
523        // behavior — same accepted-limitation class as moves, see flags doc).
524        let plain_id = idx.entry_by_record(20).unwrap();
525        idx.update_attrs(20, true, false).unwrap();
526        assert!(idx.is_excluded(plain_id));
527        assert!(!idx.is_excluded(g_id));
528
529        // New entries created under it inherit immediately.
530        let h = u16s("h.txt");
531        let first_new = idx.len() as u32;
532        let h_id = idx.upsert(&raw_attr(50, 20, &h, false, false, false));
533        idx.merge_new_into_permutations(first_new);
534        assert!(idx.is_excluded(h_id));
535    }
536
537    struct Rng(u64);
538    impl Rng {
539        fn next(&mut self) -> u64 {
540            let mut x = self.0;
541            x ^= x << 13;
542            x ^= x >> 7;
543            x ^= x << 17;
544            self.0 = x;
545            x
546        }
547    }
548
549    /// Random create/rename/delete/stat batches through the in-place merge:
550    /// every permutation stays a complete permutation, `perm_name` stays
551    /// strictly sorted (names are never mutated in place), and every record
552    /// resolves per a side model — both mid-batch (tail scan) and after the
553    /// merge (sorted lookup).
554    #[test]
555    fn random_batches_keep_permutations_canonical_and_lookups_model_true() {
556        use std::collections::HashMap;
557        let mut rng = Rng(0x5EED_CAFE_D00D);
558        let mut idx = build_sample();
559        // record → the live entry's expected name (None = deleted).
560        let mut model: HashMap<u64, Option<Vec<u8>>> = HashMap::new();
561        for record in [50u64, 60, 100] {
562            let id = idx.entry_by_record(record).unwrap();
563            model.insert(record, Some(idx.name(id).to_vec()));
564        }
565        let check = |idx: &VolumeIndex, record: u64, expect: &Option<Vec<u8>>| match (
566            idx.entry_by_record(record),
567            expect,
568        ) {
569            (Some(id), Some(name)) => assert_eq!(idx.name(id), &name[..]),
570            (None, None) => {}
571            (got, want) => panic!("record {record}: got {got:?}, want live={}", want.is_some()),
572        };
573
574        for _ in 0..100 {
575            let first_new = idx.len() as u32;
576            for _ in 0..=(rng.next() % 8) {
577                let record = 100 + rng.next() % 30;
578                match rng.next() % 4 {
579                    0 | 1 => {
580                        let name = format!("n{}_{}.txt", record, rng.next() % 100);
581                        let units = u16s(&name);
582                        idx.upsert(&raw(
583                            record,
584                            50,
585                            &units,
586                            false,
587                            rng.next() % 1000,
588                            (rng.next() % 1000) as i64,
589                        ));
590                        model.insert(record, Some(name.into_bytes()));
591                    }
592                    2 => {
593                        idx.delete(record);
594                        model.insert(record, None);
595                    }
596                    _ => {
597                        // In-place stat update: never repositions an entry
598                        // (pinned behavior); names unaffected. Mix sizes on
599                        // both sides of the u32 overflow sentinel.
600                        let size = if rng.next().is_multiple_of(8) {
601                            (4u64 << 30) + rng.next() % 1000
602                        } else {
603                            rng.next() % 5000
604                        };
605                        idx.update_stat(record, size, (rng.next() % 5000) as i64);
606                    }
607                }
608                if let Some(expect) = model.get(&record) {
609                    check(&idx, record, expect); // unmerged-tail resolution
610                }
611            }
612            idx.merge_new_into_permutations(first_new);
613
614            // Permutation property: every id exactly once, strictly sorted
615            // (names are never mutated in place). The lazy size/mtime
616            // orders are covered by query::memo's SortPerm oracle.
617            let mut seen: Vec<EntryId> = idx.name_permutation().to_vec();
618            seen.sort_unstable();
619            assert_eq!(seen, (0..idx.len() as u32).collect::<Vec<_>>());
620            assert_perm_name_sorted(&idx);
621            for (record, expect) in &model {
622                check(&idx, *record, expect);
623            }
624        }
625    }
626
627    /// `name()` must return the exact WTF-8 input bytes through every write
628    /// path and a snapshot roundtrip — the fold-overflow layout (originals
629    /// stored only where they differ) must be invisible to readers.
630    #[test]
631    fn names_roundtrip_byte_exact_through_fold_overflow_layout() {
632        let cases: &[&str] = &[
633            "lowercase.txt",
634            "File.TXT",
635            "ALLCAPS",
636            "日本語ファイル.txt",
637            "ΣΟΦΟΣ.doc",
638            "İstanbul.log",
639            "Mixed日本語Name.TXT",
640            "𠮷野家🦀.txt",
641        ];
642        let mut b = VolumeIndexBuilder::new("C:", 5);
643        for (i, name) in cases.iter().enumerate() {
644            let units = u16s(name);
645            b.push(raw(100 + i as u64, 5, &units, false, 1, 1));
646        }
647        let mut idx = b.finish();
648        // Lone surrogate through the USN write path.
649        let first_new = idx.len() as u32;
650        idx.upsert(&raw(900, 5, &[0x0041, 0xD800, 0x0042], false, 1, 1));
651        idx.merge_new_into_permutations(first_new);
652
653        let check = |idx: &VolumeIndex| {
654            for (i, name) in cases.iter().enumerate() {
655                let id = idx.entry_by_record(100 + i as u64).unwrap();
656                assert_eq!(idx.name(id), name.as_bytes(), "{name}");
657                assert_eq!(idx.name(id).len(), idx.lower_name(id).len(), "{name}");
658            }
659            let id = idx.entry_by_record(900).unwrap();
660            let mut units = Vec::new();
661            crate::wtf8::wtf8_to_utf16(idx.name(id), &mut units);
662            assert_eq!(units, vec![0x0041, 0xD800, 0x0042]);
663        };
664        check(&idx);
665
666        let mut buf = Vec::new();
667        idx.write_snapshot(&mut buf, 1, 1).unwrap();
668        let (loaded, _, _) = VolumeIndex::read_snapshot(&mut buf.as_slice()).unwrap();
669        check(&loaded);
670    }
671
672    /// In-place dir renames cross the fold-identity boundary in both
673    /// directions: gaining an original copy and dropping back to the
674    /// shared folded bytes.
675    #[test]
676    fn dir_rename_crosses_fold_identity_both_ways() {
677        let mut b = VolumeIndexBuilder::new("C:", 5);
678        let plain = u16s("plain");
679        b.push(raw(10, 5, &plain, true, 0, 1));
680        let mut idx = b.finish();
681        let id = idx.entry_by_record(10).unwrap();
682        assert_eq!(idx.name(id), b"plain");
683        assert_eq!(idx.lower_name(id), b"plain");
684
685        let upper = u16s("Upper");
686        idx.rename_dir_in_place(10, &upper, 5).unwrap();
687        idx.merge_new_into_permutations(idx.len() as u32);
688        assert_eq!(idx.name(id), b"Upper");
689        assert_eq!(idx.lower_name(id), b"upper");
690
691        let back = u16s("back_to_lower");
692        idx.rename_dir_in_place(10, &back, 5).unwrap();
693        idx.merge_new_into_permutations(idx.len() as u32);
694        assert_eq!(idx.name(id), b"back_to_lower");
695        assert_eq!(idx.lower_name(id), b"back_to_lower");
696        assert_perm_name_sorted(&idx);
697    }
698
699    /// Sizes round-trip across the u32 column + overflow map in both
700    /// directions (grow past the sentinel, shrink back under it).
701    #[test]
702    fn size_overflow_roundtrips_through_updates() {
703        let mut idx = build_sample();
704        let first_new = idx.len() as u32;
705        let name = u16s("huge.vhdx");
706        let id = idx.upsert(&raw(900, 50, &name, false, (6u64 << 30) + 7, 1));
707        idx.merge_new_into_permutations(first_new);
708        assert_eq!(idx.size(id), (6u64 << 30) + 7);
709
710        // Shrink under the sentinel: the overflow slot must be reclaimed.
711        idx.update_stat(900, 1234, 2).unwrap();
712        assert_eq!(idx.size(id), 1234);
713
714        // Grow back over it; exactly u32::MAX must overflow too (sentinel).
715        idx.update_stat(900, u32::MAX as u64, 3).unwrap();
716        assert_eq!(idx.size(id), u32::MAX as u64);
717        idx.update_stat(900, u64::MAX, 4).unwrap();
718        assert_eq!(idx.size(id), u64::MAX);
719    }
720
721    /// `dead_name_bytes` follows every pool-garbage source — the folded copy
722    /// always, the original copy when one existed ("Note.TXT" does, the
723    /// all-lowercase names don't); snapshot restore recomputes the
724    /// tombstone share (rename gaps are a lost lower bound).
725    #[test]
726    fn dead_name_bytes_tracks_pool_garbage() {
727        let owned = |idx: &VolumeIndex, record: u64| {
728            let id = idx.entry_by_record(record).unwrap();
729            let len = idx.name(id).len() as u64;
730            if idx.name(id) == idx.lower_name(id) {
731                len
732            } else {
733                len * 2
734            }
735        };
736        let mut idx = build_sample();
737        assert_eq!(idx.stats("C:").dead_name_bytes, 0);
738
739        let note = owned(&idx, 100); // "Note.TXT": folded + orig copy
740        assert_eq!(note, 16);
741        let first_new = idx.len() as u32;
742        let renamed = u16s("renamed.txt");
743        idx.upsert(&raw(100, 50, &renamed, false, 1, 1));
744        idx.merge_new_into_permutations(first_new);
745        assert_eq!(idx.stats("C:").dead_name_bytes, note);
746
747        let big = owned(&idx, 60); // "big.bin": folded copy only
748        assert_eq!(big, 7);
749        idx.delete(60);
750        assert_eq!(idx.stats("C:").dead_name_bytes, note + big);
751
752        let docs = owned(&idx, 50);
753        let dir2 = u16s("docs2");
754        idx.rename_dir_in_place(50, &dir2, 5);
755        let s = idx.stats("C:");
756        assert_eq!(s.dead_name_bytes, note + big + docs);
757        assert!(s.pool_garbage_ratio > 0.0);
758
759        let mut buf = Vec::new();
760        idx.write_snapshot(&mut buf, 1, 1).unwrap();
761        let (loaded, _, _) = VolumeIndex::read_snapshot(&mut buf.as_slice()).unwrap();
762        assert_eq!(loaded.stats("C:").dead_name_bytes, note + big);
763    }
764
765    #[test]
766    fn no_op_batches_keep_content_generation_monotonic() {
767        let mut idx = build_sample();
768        let g0 = idx.content_generation();
769        let s0 = idx.structural_generation();
770        let perm_before = idx.name_permutation().to_vec();
771
772        // Empty batch (e.g. a dir-rename-only USN batch): generation still
773        // moves so derived caches invalidate, permutations stay put.
774        idx.merge_new_into_permutations(idx.len() as u32);
775        assert_eq!(idx.content_generation(), g0 + 1);
776        assert_eq!(idx.name_permutation(), perm_before.as_slice());
777
778        // Tombstone-only batch: ids stay in the permutations (flag-only).
779        idx.delete(60);
780        idx.merge_new_into_permutations(idx.len() as u32);
781        assert_eq!(idx.content_generation(), g0 + 2);
782        assert_eq!(idx.name_permutation(), perm_before.as_slice());
783
784        // Individual mutations between batches never bump on their own.
785        idx.update_stat(100, 1, 1).unwrap();
786        idx.update_attrs(100, true, false).unwrap();
787        assert_eq!(idx.content_generation(), g0 + 2);
788        // Content batches never touch the structural generation.
789        assert_eq!(idx.structural_generation(), s0);
790    }
791}