1use std::collections::HashSet;
21use std::path::PathBuf;
22use std::time::Instant;
23
24use std::os::windows::ffi::OsStrExt;
25use std::os::windows::fs::MetadataExt;
26
27use crate::index::{Frn, RawEntry, VolumeIndex, VolumeIndexBuilder};
28use crate::wtf8::push_wtf8_pair;
29
30use super::ScanStats;
31use super::walk_id::path_record;
32
33const ATTR_DIRECTORY: u32 = 0x10;
36const ATTR_HIDDEN: u32 = 0x2;
37const ATTR_SYSTEM: u32 = 0x4;
38const ATTR_REPARSE_POINT: u32 = 0x400;
39
40const MAX_DEPTH: u32 = 100;
43
44const SCOPE_ROOT_RECORD: u64 = 0;
48
49struct Pending {
53 path: PathBuf,
54 folded: Vec<u8>,
55 record: u64,
56 depth: u32,
57}
58
59#[must_use]
67pub fn walk_scan(roots: &[String], excludes: &[String]) -> (VolumeIndex, ScanStats) {
68 let t0 = Instant::now();
69 let mut stats = ScanStats {
70 volume: "scope".to_string(),
71 ..Default::default()
72 };
73 let mut b = VolumeIndexBuilder::new("", SCOPE_ROOT_RECORD);
74
75 let mut units: Vec<u16> = Vec::new();
77 let mut name_buf: Vec<u8> = Vec::new();
78 let mut lower_buf: Vec<u8> = Vec::new();
79 let mut stack: Vec<Pending> = Vec::new();
80
81 let prune_set: HashSet<Vec<u8>> = excludes
86 .iter()
87 .map(|e| {
88 units.clear();
89 units.extend(PathBuf::from(e).as_os_str().encode_wide());
90 name_buf.clear();
91 lower_buf.clear();
92 push_wtf8_pair(&units, &mut name_buf, &mut lower_buf);
93 lower_buf.clone()
94 })
95 .collect();
96
97 for root in roots {
98 let path = PathBuf::from(root);
99 let md = match std::fs::metadata(&path) {
100 Ok(m) if m.is_dir() => m,
101 _ => {
102 stats.walk_read_errors += 1;
103 continue;
104 }
105 };
106 units.clear();
107 units.extend(path.as_os_str().encode_wide());
108 name_buf.clear();
109 lower_buf.clear();
110 push_wtf8_pair(&units, &mut name_buf, &mut lower_buf);
111 let record = path_record(&lower_buf);
112 let attrs = md.file_attributes();
113 b.push(RawEntry {
117 parent_frn: Frn(SCOPE_ROOT_RECORD),
118 frn: Frn(record),
119 name_utf16: &units,
120 is_dir: true,
121 is_reparse: attrs & ATTR_REPARSE_POINT != 0,
122 is_hidden: attrs & ATTR_HIDDEN != 0,
123 is_system: attrs & ATTR_SYSTEM != 0,
124 size: md.file_size(),
125 mtime: md.last_write_time() as i64,
126 });
127 stats.dirs += 1;
128 let mut folded = lower_buf.clone();
133 if folded.last() == Some(&b'\\') {
134 folded.pop();
135 }
136 stack.push(Pending {
137 path,
138 folded,
139 record,
140 depth: 0,
141 });
142 }
143
144 while let Some(cur) = stack.pop() {
145 let Ok(rd) = std::fs::read_dir(&cur.path) else {
146 stats.walk_read_errors += 1;
147 continue;
148 };
149 for entry in rd {
150 let Ok(entry) = entry else {
151 stats.walk_read_errors += 1;
152 continue;
153 };
154 let Ok(md) = entry.metadata() else {
157 stats.walk_read_errors += 1;
158 continue;
159 };
160 let attrs = md.file_attributes();
161 let is_dir = attrs & ATTR_DIRECTORY != 0;
162 let is_reparse = attrs & ATTR_REPARSE_POINT != 0;
163
164 let name = entry.file_name();
165 units.clear();
166 units.extend(name.encode_wide());
167 name_buf.clear();
168 lower_buf.clear();
169 push_wtf8_pair(&units, &mut name_buf, &mut lower_buf);
173 let mut folded_child = cur.folded.clone();
174 folded_child.push(b'\\');
175 folded_child.extend_from_slice(&lower_buf);
176 let record = path_record(&folded_child);
177
178 if is_dir && prune_set.contains(&folded_child) {
182 stats.walk_excluded_pruned += 1;
183 continue;
184 }
185
186 b.push(RawEntry {
187 parent_frn: Frn(cur.record),
188 frn: Frn(record),
189 name_utf16: &units,
190 is_dir,
191 is_reparse,
192 is_hidden: attrs & ATTR_HIDDEN != 0,
193 is_system: attrs & ATTR_SYSTEM != 0,
194 size: md.file_size(),
195 mtime: md.last_write_time() as i64,
196 });
197 if is_dir {
198 stats.dirs += 1;
199 } else {
200 stats.files += 1;
201 }
202
203 if is_dir && !is_reparse {
206 if cur.depth + 1 < MAX_DEPTH {
207 stack.push(Pending {
208 path: entry.path(),
209 folded: folded_child,
210 record,
211 depth: cur.depth + 1,
212 });
213 } else {
214 stats.walk_depth_truncated += 1;
215 }
216 }
217 }
218 }
219
220 stats.elapsed_walk_ms = t0.elapsed().as_millis() as u64;
221 stats.walk_dirs = stats.dirs;
222 stats.walk_files = stats.files;
223 let (idx, finish) = b.finish_timed();
224 stats.elapsed_build_ms = finish.build_ms;
225 stats.elapsed_sort_ms = finish.sort_ms;
226 stats.elapsed_total_ms = t0.elapsed().as_millis() as u64;
227 stats.peak_working_set_bytes = crate::mft::peak_working_set();
228 (idx, stats)
229}
230
231#[cfg(test)]
232mod tests {
233 use super::*;
234
235 struct Tree(PathBuf);
237 impl Tree {
238 fn new(tag: &str) -> Self {
239 let dir =
240 std::env::temp_dir().join(format!("fmf-walk-test-{tag}-{}", std::process::id()));
241 let _ = std::fs::remove_dir_all(&dir);
242 std::fs::create_dir_all(&dir).expect("create temp tree root");
243 Self(dir)
244 }
245 }
246 impl Drop for Tree {
247 fn drop(&mut self) {
248 let _ = std::fs::remove_dir_all(&self.0);
249 }
250 }
251
252 fn folded(path: &str) -> Vec<u8> {
253 let units: Vec<u16> = std::path::Path::new(path)
254 .as_os_str()
255 .encode_wide()
256 .collect();
257 let (mut n, mut l) = (Vec::new(), Vec::new());
258 push_wtf8_pair(&units, &mut n, &mut l);
259 l
260 }
261
262 #[test]
263 fn walk_builds_queryable_index_with_correct_paths() {
264 let tree = Tree::new("paths");
265 let root = tree.0.join("Documents");
266 std::fs::create_dir_all(root.join("sub")).unwrap();
267 std::fs::write(root.join("alpha.txt"), b"a").unwrap();
268 std::fs::write(root.join("sub").join("beta.log"), b"bb").unwrap();
269
270 let root_str = root.to_str().unwrap().to_string();
271 let (idx, stats) = walk_scan(std::slice::from_ref(&root_str), &[]);
272
273 assert_eq!(stats.dirs, 2, "dirs");
275 assert_eq!(stats.files, 2, "files");
276 assert_eq!(stats.walk_read_errors, 0, "no read errors");
277
278 let beta_path = format!("{root_str}\\sub\\beta.log");
280 let id = idx
281 .entry_by_record(path_record(&folded(&beta_path)))
282 .expect("beta.log indexed");
283 let mut out = Vec::new();
284 idx.append_path(id, &mut out);
285 assert_eq!(String::from_utf8(out).unwrap(), beta_path);
286
287 let sub_id = idx
289 .entry_by_record(path_record(&folded(&format!("{root_str}\\sub"))))
290 .expect("sub indexed");
291 assert_eq!(idx.parent(id), sub_id);
292 let root_id = idx
293 .entry_by_record(path_record(&folded(&root_str)))
294 .expect("root indexed");
295 assert_eq!(idx.parent(sub_id), root_id);
296 assert!(idx.is_dir(sub_id));
297 assert_eq!(idx.size(id), 2);
298 }
299
300 #[test]
301 fn multiple_roots_share_one_index() {
302 let tree = Tree::new("multiroot");
303 let a = tree.0.join("RootA");
304 let b = tree.0.join("RootB");
305 std::fs::create_dir_all(&a).unwrap();
306 std::fs::create_dir_all(&b).unwrap();
307 std::fs::write(a.join("one.txt"), b"1").unwrap();
308 std::fs::write(b.join("two.txt"), b"2").unwrap();
309
310 let (idx, stats) = walk_scan(
311 &[
312 a.to_str().unwrap().to_string(),
313 b.to_str().unwrap().to_string(),
314 ],
315 &[],
316 );
317 assert_eq!(stats.files, 2, "one file per root");
318
319 for (root, name) in [(&a, "one.txt"), (&b, "two.txt")] {
320 let p = format!("{}\\{name}", root.to_str().unwrap());
321 let id = idx
322 .entry_by_record(path_record(&folded(&p)))
323 .unwrap_or_else(|| panic!("{p} indexed"));
324 let mut out = Vec::new();
325 idx.append_path(id, &mut out);
326 assert_eq!(String::from_utf8(out).unwrap(), p);
327 }
328 }
329
330 #[test]
331 fn missing_root_is_counted_not_fatal() {
332 let (idx, stats) = walk_scan(&["Z:\\does\\not\\exist-fmf".to_string()], &[]);
333 assert_eq!(stats.walk_read_errors, 1);
334 assert_eq!(stats.files, 0);
336 assert_eq!(stats.dirs, 0);
337 assert_eq!(idx.live_len(), 1, "just the empty synthetic root");
338 }
339
340 #[test]
341 fn excluded_subtree_is_pruned() {
342 let tree = Tree::new("exclude");
343 let root = tree.0.join("Projects");
344 std::fs::create_dir_all(root.join("app").join("node_modules")).unwrap();
345 std::fs::create_dir_all(root.join("archive")).unwrap();
346 std::fs::write(root.join("app").join("main.rs"), b"m").unwrap();
347 std::fs::write(root.join("app").join("node_modules").join("dep.js"), b"d").unwrap();
348 std::fs::write(root.join("archive").join("old.txt"), b"o").unwrap();
349
350 let root_str = root.to_str().unwrap().to_string();
351 let nm = format!("{root_str}\\app\\node_modules");
352 let archive = format!("{root_str}\\archive");
353 let (idx, stats) = walk_scan(
354 std::slice::from_ref(&root_str),
355 &[nm.clone(), archive.clone()],
356 );
357
358 assert_eq!(stats.walk_excluded_pruned, 2, "two subtrees pruned");
360 let present = |p: &str| idx.entry_by_record(path_record(&folded(p))).is_some();
363 assert!(
364 present(&format!("{root_str}\\app\\main.rs")),
365 "sibling kept"
366 );
367 assert!(!present(&format!("{nm}\\dep.js")), "excluded file dropped");
368 assert!(
369 !present(&format!("{archive}\\old.txt")),
370 "excluded file dropped"
371 );
372 assert!(!present(&nm), "excluded dir itself dropped");
373 assert!(!present(&archive), "excluded dir itself dropped");
374 }
375
376 #[test]
377 fn root_with_trailing_separator_resolves_and_prunes_canonically() {
378 let tree = Tree::new("trailingsep");
379 let base = tree.0.join("Data");
380 std::fs::create_dir_all(base.join("keep")).unwrap();
381 std::fs::create_dir_all(base.join("skip")).unwrap();
382 std::fs::write(base.join("keep").join("a.txt"), b"a").unwrap();
383 std::fs::write(base.join("skip").join("b.txt"), b"b").unwrap();
384
385 let base_str = base.to_str().unwrap().to_string();
386 let root_with_sep = format!("{base_str}\\");
390 let skip = format!("{base_str}\\skip");
391 let (idx, stats) = walk_scan(
392 std::slice::from_ref(&root_with_sep),
393 std::slice::from_ref(&skip),
394 );
395
396 assert_eq!(
397 stats.walk_excluded_pruned, 1,
398 "trailing-sep root still prunes"
399 );
400 let present = |p: &str| idx.entry_by_record(path_record(&folded(p))).is_some();
401 assert!(
402 present(&format!("{base_str}\\keep\\a.txt")),
403 "kept file resolves"
404 );
405 assert!(!present(&format!("{skip}\\b.txt")), "excluded file dropped");
406 }
407}