Skip to main content

fmf_core\scan/
deferred.rs

1//! Deferred $`ATTRIBUTE_LIST` name resolution (ADR-0011): name-bearing
2//! extension records are cached in RAM while the $MFT streams through, so
3//! this pass resolves names without disk reads; anything missing (cache
4//! cap, torn records) falls back to a targeted read of the live volume.
5
6use ntfs_reader::api::{
7    NtfsAttributeListEntry, NtfsAttributeType, NtfsFileName, NtfsFileNamespace,
8};
9use ntfs_reader::file::NtfsFile;
10use rustc_hash::FxHashMap;
11
12use crate::mft::pick_name;
13
14use super::parse::{ParsedBatch, RecordArena};
15use super::volume_io::{RunMap, apply_fixup, open_raw_volume};
16
17/// Upper bound on cached name-bearing extension records (~1KiB each, so
18/// ≤128MiB transient). A real C: has tens of thousands; past the cap the
19/// deferred pass falls back to disk reads for the remainder.
20pub(super) const EXT_NAME_CACHE_CAP: usize = 128 << 10;
21
22/// Random access to single records for the deferred attribute-list pass.
23struct RecordReader<'a> {
24    file: std::fs::File,
25    map: &'a RunMap,
26    record_size: usize,
27    buf: Vec<u8>,
28}
29
30impl RecordReader<'_> {
31    fn read_record(&mut self, number: u64) -> Option<&[u8]> {
32        use std::io::{Read, Seek, SeekFrom};
33        let logical = number * self.record_size as u64;
34        let (phys, contig) = self.map.physical(logical)?;
35        if (contig as usize) < self.record_size {
36            return None;
37        }
38        self.buf.resize(self.record_size, 0);
39        self.file.seek(SeekFrom::Start(phys)).ok()?;
40        self.file.read_exact(&mut self.buf).ok()?;
41        if !NtfsFile::is_valid(&self.buf) || !apply_fixup(&mut self.buf) {
42            return None;
43        }
44        Some(&self.buf)
45    }
46}
47
48/// Disk fallback for extension records missing from the streamed cache —
49/// opened only when actually needed (expected: never on a healthy scan).
50struct LazyRecordReader<'a> {
51    volume_path: &'a str,
52    map: &'a RunMap,
53    record_size: usize,
54    inner: Option<RecordReader<'a>>,
55    failed: bool,
56    /// Failed `read_record` calls — each one is a name that stays
57    /// unresolved until the next rescan. `resolve_deferred` folds this into
58    /// its batch, so the count reaches `ScanStats` (don't go silent).
59    failures: u64,
60}
61
62impl<'a> LazyRecordReader<'a> {
63    const fn new(volume_path: &'a str, map: &'a RunMap, record_size: usize) -> Self {
64        LazyRecordReader {
65            volume_path,
66            map,
67            record_size,
68            inner: None,
69            failed: false,
70            failures: 0,
71        }
72    }
73
74    fn read_record(&mut self, number: u64) -> Option<&[u8]> {
75        if self.inner.is_none() && !self.failed {
76            match open_raw_volume(self.volume_path) {
77                Ok(file) => {
78                    self.inner = Some(RecordReader {
79                        file,
80                        map: self.map,
81                        record_size: self.record_size,
82                        buf: Vec::new(),
83                    });
84                }
85                Err(e) => {
86                    self.failed = true;
87                    tracing::warn!(error = %e, "deferred-pass fallback volume handle unavailable");
88                }
89            }
90        }
91        let Some(inner) = self.inner.as_mut() else {
92            self.failures += 1;
93            return None;
94        };
95        let got = inner.read_record(number);
96        if got.is_none() {
97            self.failures += 1;
98        }
99        got
100    }
101}
102
103/// Resolve the display name of a record whose $`FILE_NAME` lives in extension
104/// records (resident $`ATTRIBUTE_LIST` → referenced records). Targets come
105/// from the streamed extension-record cache; anything missing (cache cap,
106/// torn records) falls back to a targeted disk read. Mirrors ntfs-reader's
107/// `get_best_file_name` without needing the whole MFT in RAM.
108fn resolve_attr_list_name(
109    base: &NtfsFile,
110    ext: &FxHashMap<u64, u32>,
111    arena: &RecordArena,
112    rr: &mut LazyRecordReader,
113) -> Option<NtfsFileName> {
114    let attr = base.get_attribute(NtfsAttributeType::AttributeList)?;
115    if attr.header.is_non_resident != 0 {
116        return None; // rare; counted as skipped
117    }
118    let header = attr.resident_header()?;
119    let data = attr.data();
120    let start = header.value_offset as usize;
121    let end = start.checked_add(header.value_length as usize)?;
122    if end > data.len() {
123        return None;
124    }
125    let list = &data[start..end];
126
127    let mut best: Option<NtfsFileName> = None;
128    let mut off = 0usize;
129    while off + size_of::<NtfsAttributeListEntry>() <= list.len() {
130        // `list` is a &[u8] and an entry sits at an arbitrary byte offset, so the
131        // address is not guaranteed aligned for NtfsAttributeListEntry — read it
132        // out unaligned instead of forming a misaligned reference (UB). The loop
133        // guard above keeps the read within `list`.
134        let entry = unsafe {
135            std::ptr::read_unaligned(list.as_ptr().add(off).cast::<NtfsAttributeListEntry>())
136        };
137        let len = entry.length as usize;
138        if len < size_of::<NtfsAttributeListEntry>() || off + len > list.len() {
139            break;
140        }
141        if entry.type_id == NtfsAttributeType::FileName as u32 {
142            let target = entry.reference();
143            if target != base.number {
144                let picked = match ext.get(&target) {
145                    Some(&slot) => pick_name(&NtfsFile::new(target, arena.get(slot))),
146                    None => rr
147                        .read_record(target)
148                        .and_then(|bytes| pick_name(&NtfsFile::new(target, bytes))),
149                };
150                if let Some(name) = picked {
151                    let ns = name.header.namespace;
152                    if ns == NtfsFileNamespace::Win32 as u8
153                        || ns == NtfsFileNamespace::Win32AndDos as u8
154                    {
155                        return Some(name);
156                    }
157                    if best.is_none() {
158                        best = Some(name);
159                    }
160                }
161            }
162        }
163        off += len.next_multiple_of(8);
164    }
165    best
166}
167
168/// Resolve deferred $`ATTRIBUTE_LIST` names in parallel — almost entirely
169/// from RAM: every target is an extension record and the whole $MFT just
170/// streamed through the pipeline, so `ext` already holds the bytes
171/// (ADR-0011). Chunk order is preserved, so `EntryId` assignment matches a
172/// serial loop.
173pub(super) fn resolve_deferred(
174    volume_path: &str,
175    runmap: &RunMap,
176    record_size: usize,
177    ext: &FxHashMap<u64, u32>,
178    arena: &RecordArena,
179    deferred: &[(u64, u32)],
180) -> Vec<ParsedBatch> {
181    use rayon::prelude::*;
182    const DEFER_CHUNK: usize = 256;
183
184    deferred
185        .par_chunks(DEFER_CHUNK)
186        .map(|chunk| {
187            let mut out = ParsedBatch::default();
188            let mut rr = LazyRecordReader::new(volume_path, runmap, record_size);
189            for &(number, slot) in chunk {
190                let f = NtfsFile::new(number, arena.get(slot));
191                match resolve_attr_list_name(&f, ext, arena, &mut rr) {
192                    Some(name) => out.push_named(&f, &name),
193                    None => out.deferred_unresolved += 1,
194                }
195            }
196            out.deferred_name_read_failures = rr.failures;
197            out
198        })
199        .collect()
200}