1use rayon::prelude::*;
10
11use super::QueryOptions;
12use super::compile::{CTerm, CompiledQuery, Driver};
13use super::matchers::{EvalCtx, terms_match, terms_match_iter};
14use super::memo::{DirPathsLower, DirPathsOrig, MtimePerm, OffsetTable, PathMemos, SizePerm};
15use super::sweep::driver_candidates;
16use crate::index::{EntryId, SortKey, VolumeIndex};
17
18fn path_memos(idx: &VolumeIndex, q: &CompiledQuery) -> PathMemos {
22 PathMemos {
23 lower: q.needs_folded_paths.then(|| {
24 idx.cached_derived_or_update(|prev| match prev {
25 Some(p) => DirPathsLower::extend_from(idx, p),
26 None => DirPathsLower::build(idx),
27 })
28 }),
29 orig: q.needs_orig_paths.then(|| {
30 idx.cached_derived_or_update(|prev| match prev {
31 Some(p) => DirPathsOrig::extend_from(idx, p),
32 None => DirPathsOrig::build(idx),
33 })
34 }),
35 }
36}
37
38const CHUNK: usize = 1 << 16;
40
41pub struct SearchResult {
44 pub ids: Vec<EntryId>,
46 pub content_generation: u64,
48 pub structural_generation: u64,
50}
51
52#[derive(Debug, Default, Clone)]
54pub struct SearchMetrics {
55 pub driver: String,
57 pub memo_us: u64,
59 pub scan_us: u64,
61 pub materialize_us: u64,
63 pub entries_scanned: u64,
65 pub excluded_skipped: u64,
67}
68
69pub fn search(
79 idx: &VolumeIndex,
80 q: &CompiledQuery,
81 opt: &QueryOptions,
82) -> (SearchResult, SearchMetrics) {
83 let mut metrics = SearchMetrics {
84 driver: q.driver_label(),
85 ..Default::default()
86 };
87 let mut stage = crate::metrics::Stage::start();
88 let skip_excluded = !opt.include_hidden_system;
89
90 if q.groups.len() == 1
93 && matches!(q.groups[0].driver, Driver::MatchAll)
94 && q.groups[0].terms.is_empty()
95 {
96 metrics.driver = "perm-walk".to_string();
97 metrics.memo_us = stage.lap();
98 let ids = materialize_filtered(idx, opt, |id| {
99 idx.is_live(id) && !(skip_excluded && idx.is_excluded(id))
100 });
101 metrics.entries_scanned = idx.len() as u64;
102 metrics.materialize_us = stage.lap();
103 return (
104 SearchResult {
105 ids,
106 content_generation: idx.content_generation(),
107 structural_generation: idx.structural_generation(),
108 },
109 metrics,
110 );
111 }
112
113 let memo = path_memos(idx, q);
115 let needs_table = q
116 .groups
117 .iter()
118 .any(|g| !matches!(g.driver, Driver::FullScan | Driver::MatchAll));
119 let table: Option<std::sync::Arc<OffsetTable>> = needs_table.then(|| {
120 idx.cached_derived_or_update(|prev| match prev {
121 Some(p) => OffsetTable::extend_from(idx, p),
122 None => OffsetTable::build(idx),
123 })
124 });
125 metrics.memo_us = stage.lap();
126
127 let n = idx.len();
128 let mut bitmap: Vec<u64> = vec![0u64; n.div_ceil(64)];
129
130 for group in &q.groups {
131 match &group.driver {
132 Driver::MatchAll | Driver::FullScan => {
136 full_scan_group(
137 idx,
138 &memo,
139 &group.terms,
140 skip_excluded,
141 &mut bitmap,
142 &mut metrics,
143 );
144 }
145 driver => {
146 let table = table.as_ref().expect("offset table built");
147 let candidates = driver_candidates(idx, table, driver, skip_excluded);
148 metrics.entries_scanned += candidates.len() as u64;
149 if group.terms.is_empty() && group.driver_exact {
152 for id in candidates {
153 bitmap[id as usize / 64] |= 1u64 << (id as usize % 64);
154 }
155 } else {
156 let passed: Vec<Vec<EntryId>> = candidates
157 .par_chunks(2048)
158 .map(|chunk| {
159 let mut ctx = EvalCtx::default();
160 chunk
161 .iter()
162 .copied()
163 .filter(|&id| {
164 terms_match_iter(
165 idx,
166 &memo,
167 &mut ctx,
168 group.residual_terms(),
169 id,
170 )
171 })
172 .collect()
173 })
174 .collect();
175 for ids in passed {
176 for id in ids {
177 bitmap[id as usize / 64] |= 1u64 << (id as usize % 64);
178 }
179 }
180 }
181 }
182 }
183 }
184 metrics.scan_us = stage.lap();
185
186 let ids = materialize_filtered(idx, opt, |id| {
187 bitmap[id as usize / 64] >> (id as usize % 64) & 1 == 1
188 });
189 metrics.materialize_us = stage.lap();
190
191 (
192 SearchResult {
193 ids,
194 content_generation: idx.content_generation(),
195 structural_generation: idx.structural_generation(),
196 },
197 metrics,
198 )
199}
200
201pub fn refine(
208 idx: &VolumeIndex,
209 q: &CompiledQuery,
210 opt: &QueryOptions,
211 prev_ids: &[EntryId],
212) -> (SearchResult, SearchMetrics) {
213 const REFINE_CHUNK: usize = 4096;
214 let mut metrics = SearchMetrics {
215 driver: q.driver_label(),
216 ..Default::default()
217 };
218 let mut stage = crate::metrics::Stage::start();
219 let skip_excluded = !opt.include_hidden_system;
220
221 let memo = path_memos(idx, q);
222 metrics.memo_us = stage.lap();
223
224 let chunks: Vec<Vec<EntryId>> = prev_ids
225 .par_chunks(REFINE_CHUNK)
226 .map(|chunk| {
227 let mut ctx = EvalCtx::default();
228 chunk
229 .iter()
230 .copied()
231 .filter(|&id| {
232 idx.is_live(id)
233 && !(skip_excluded && idx.is_excluded(id))
234 && q.groups
235 .iter()
236 .any(|g| terms_match_iter(idx, &memo, &mut ctx, g.all_terms(), id))
237 })
238 .collect()
239 })
240 .collect();
241 metrics.entries_scanned = prev_ids.len() as u64;
242 metrics.scan_us = stage.lap();
243
244 let mut ids = Vec::with_capacity(chunks.iter().map(Vec::len).sum());
245 for c in &chunks {
246 ids.extend_from_slice(c);
247 }
248 metrics.materialize_us = stage.lap();
249
250 (
251 SearchResult {
252 ids,
253 content_generation: idx.content_generation(),
254 structural_generation: idx.structural_generation(),
255 },
256 metrics,
257 )
258}
259
260fn full_scan_group(
261 idx: &VolumeIndex,
262 memo: &PathMemos,
263 terms: &[CTerm],
264 skip_excluded: bool,
265 bitmap: &mut [u64],
266 metrics: &mut SearchMetrics,
267) {
268 let n = idx.len();
269 let chunk_results: Vec<(Vec<u64>, u64, u64)> = (0..n.div_ceil(CHUNK))
270 .into_par_iter()
271 .map(|ci| {
272 let start = ci * CHUNK;
273 let end = (start + CHUNK).min(n);
274 let mut words = vec![0u64; (end - start).div_ceil(64)];
275 let mut ctx = EvalCtx::default();
276 let mut scanned = 0u64;
277 let mut skipped = 0u64;
278 for id in start..end {
279 let id = id as EntryId;
280 if !idx.is_live(id) {
281 continue;
282 }
283 if skip_excluded && idx.is_excluded(id) {
284 skipped += 1;
285 continue;
286 }
287 scanned += 1;
288 if terms_match(idx, memo, &mut ctx, terms, id) {
289 let rel = id as usize - start;
290 words[rel / 64] |= 1u64 << (rel % 64);
291 }
292 }
293 (words, scanned, skipped)
294 })
295 .collect();
296
297 let mut word_base = 0usize;
298 for (words, scanned, skipped) in &chunk_results {
299 for (i, w) in words.iter().enumerate() {
300 bitmap[word_base + i] |= w;
301 }
302 word_base += CHUNK / 64;
303 metrics.entries_scanned += scanned;
304 metrics.excluded_skipped += skipped;
305 }
306}
307
308fn materialize_filtered(
314 idx: &VolumeIndex,
315 opt: &QueryOptions,
316 keep: impl Fn(EntryId) -> bool + Sync,
317) -> Vec<EntryId> {
318 const MAT_CHUNK: usize = 1 << 14;
321
322 let size_perm;
323 let mtime_perm;
324 let perm: &[EntryId] = match opt.sort {
325 SortKey::Name => idx.name_permutation(),
326 SortKey::Size => {
327 size_perm = SizePerm::get(idx);
328 &size_perm.0.ids
329 }
330 SortKey::Mtime => {
331 mtime_perm = MtimePerm::get(idx);
332 &mtime_perm.0.ids
333 }
334 };
335 let chunks: Vec<Vec<EntryId>> = perm
336 .par_chunks(MAT_CHUNK)
337 .map(|chunk| chunk.iter().copied().filter(|&id| keep(id)).collect())
338 .collect();
339 let total = chunks.iter().map(Vec::len).sum();
340 let mut ids = Vec::with_capacity(total);
341 if opt.desc {
342 for c in chunks.iter().rev() {
343 ids.extend(c.iter().rev());
344 }
345 } else {
346 for c in &chunks {
347 ids.extend_from_slice(c);
348 }
349 }
350 ids
351}
352
353#[cfg(test)]
354mod tests {
355 use super::super::dates::UtcResolver;
356 use super::super::{CaseMode, QueryOptions, compile, parse};
357 use super::*;
358 use crate::index::{Frn, RawEntry, SortKey, VolumeIndexBuilder};
359
360 fn run(idx: &VolumeIndex, query: &str, opt: QueryOptions) -> Vec<String> {
361 let ast = parse(query).unwrap();
362 let q = compile(&ast, opt.case, &UtcResolver).unwrap();
363 search(idx, &q, &opt)
364 .0
365 .ids
366 .iter()
367 .map(|&id| String::from_utf8_lossy(idx.name(id)).into_owned())
368 .collect()
369 }
370
371 fn names(idx: &VolumeIndex, query: &str) -> Vec<String> {
372 run(idx, query, QueryOptions::default())
373 }
374
375 fn sample() -> VolumeIndex {
377 let day = 864_000_000_000i64; let entries: &[(&str, u64, u64, bool, u64, i64)] = &[
379 ("Docs", 10, 5, true, 0, 100 * day),
381 ("Report.PDF", 11, 10, false, 2 << 20, 19_000 * day),
382 ("notes.txt", 12, 10, false, 512, 19_100 * day),
383 ("src", 20, 5, true, 0, 100 * day),
384 ("main.rs", 21, 20, false, 4096, 19_200 * day),
385 ("big.BIN", 30, 5, false, 3 << 30, 19_300 * day),
386 ];
387 let mut b = VolumeIndexBuilder::new("C:", 5);
388 for (name, rec, parent, is_dir, size, mtime) in entries {
389 let units: Vec<u16> = name.encode_utf16().collect();
390 b.push(RawEntry {
391 parent_frn: Frn(*parent),
392 frn: Frn((1 << 48) | rec),
393 name_utf16: &units,
394 is_dir: *is_dir,
395 is_reparse: false,
396 is_hidden: false,
397 is_system: false,
398 size: *size,
399 mtime: *mtime,
400 });
401 }
402 b.finish()
403 }
404
405 #[test]
406 fn substring_smart_case() {
407 let idx = sample();
408 assert_eq!(names(&idx, "report"), vec!["Report.PDF"]);
409 assert_eq!(names(&idx, "Report"), vec!["Report.PDF"]);
410 assert!(names(&idx, "REPORT.pdf").is_empty());
411 let opt = QueryOptions {
412 case: CaseMode::Insensitive,
413 ..Default::default()
414 };
415 assert_eq!(run(&idx, "REPORT", opt), vec!["Report.PDF"]);
416 }
417
418 #[test]
419 fn and_or_not() {
420 let idx = sample();
421 assert_eq!(names(&idx, "main rs"), vec!["main.rs"]);
422 let mut both = names(&idx, "pdf | txt");
423 both.sort();
424 assert_eq!(both, vec!["Report.PDF", "notes.txt"]);
425 assert_eq!(names(&idx, ".txt !notes"), Vec::<String>::new());
426 }
427
428 #[test]
429 fn ext_filter_case_insensitive_and_files_only() {
430 let idx = sample();
431 assert_eq!(names(&idx, "ext:pdf"), vec!["Report.PDF"]);
432 let mut multi = names(&idx, "ext:pdf;rs");
433 multi.sort();
434 assert_eq!(multi, vec!["Report.PDF", "main.rs"]);
435 }
436
437 #[test]
438 fn size_filter_excludes_dirs() {
439 let idx = sample();
440 assert_eq!(names(&idx, "size:>1gb"), vec!["big.BIN"]);
441 assert_eq!(names(&idx, "size:512"), vec!["notes.txt"]);
442 assert!(names(&idx, "size:0 folder:").is_empty());
443 }
444
445 #[test]
446 fn folder_and_file_filters() {
447 let idx = sample();
448 let mut dirs = names(&idx, "folder:");
449 dirs.sort();
450 assert_eq!(dirs, vec!["C:", "Docs", "src"]);
451 assert_eq!(names(&idx, "folder:doc"), vec!["Docs"]);
452 }
453
454 #[test]
455 fn path_terms() {
456 let idx = sample();
457 let mut in_docs = names(&idx, r"docs\");
458 in_docs.sort();
459 assert_eq!(in_docs, vec!["Report.PDF", "notes.txt"]);
461 assert_eq!(names(&idx, r"path:src main"), vec!["main.rs"]);
462 }
463
464 #[test]
465 fn wildcards_anchor_to_whole_name() {
466 let idx = sample();
467 assert_eq!(names(&idx, "*.rs"), vec!["main.rs"]);
468 assert_eq!(names(&idx, "main.?s"), vec!["main.rs"]);
469 assert!(names(&idx, "*.r").is_empty());
470 assert_eq!(names(&idx, "main*"), vec!["main.rs"]);
472 assert_eq!(names(&idx, "*ain*"), vec!["main.rs"]);
473 assert!(names(&idx, "ain*").is_empty());
474 }
475
476 #[test]
477 fn regex_term() {
478 let idx = sample();
479 assert_eq!(names(&idx, "regex:^ma.n\\.rs$"), vec!["main.rs"]);
480 }
481
482 #[test]
488 fn regex_matches_naive_oracle() {
489 use regex::bytes::RegexBuilder;
490
491 struct Rng(u64);
492 impl Rng {
493 fn next(&mut self) -> u64 {
494 let mut x = self.0;
495 x ^= x << 13;
496 x ^= x >> 7;
497 x ^= x << 17;
498 self.0 = x;
499 x
500 }
501 }
502 let frags = [
503 "report",
504 "Report",
505 "main",
506 "src",
507 "日本",
508 "語",
509 "data",
510 "DLL",
511 "exe",
512 "img",
513 "2024",
514 "v2",
515 "ファイル",
516 "tmp",
517 ];
518 let exts = [".rs", ".txt", ".PDF", ".dll", ".log", ".日", ""];
519 let mut rng = Rng(0x9E37_79B9);
520 let mut b = VolumeIndexBuilder::new("C:", 5);
521 let mut made: Vec<String> = vec!["C:".to_string()];
524 for i in 0..300u64 {
525 let mut name = String::new();
526 for _ in 0..=(rng.next() % 3) {
527 name.push_str(frags[rng.next() as usize % frags.len()]);
528 }
529 name.push_str(exts[rng.next() as usize % exts.len()]);
530 if name.is_empty() {
531 name.push('x');
532 }
533 let units: Vec<u16> = name.encode_utf16().collect();
534 b.push(RawEntry {
535 parent_frn: Frn(5),
536 frn: Frn((1 << 48) | (100 + i)),
537 name_utf16: &units,
538 is_dir: false,
539 is_reparse: false,
540 is_hidden: false,
541 is_system: false,
542 size: i,
543 mtime: i as i64,
544 });
545 made.push(name);
546 }
547 let idx = b.finish();
548
549 let patterns = [
552 "^report",
553 "report",
554 "ort",
555 "main.*rs$",
556 "日.*語",
557 "[0-9]+",
558 "v[0-9]",
559 r".*\.rs$",
560 r"\.dll$",
561 "dll|exe",
562 "^$",
563 "Report",
564 "REPORT",
565 "src.*main",
566 r"^\d",
567 ".*",
568 "ファイル",
569 "(report|main)",
570 r"tmp.*\.log$",
571 "日本",
572 ];
573 let cases = [CaseMode::Smart, CaseMode::Insensitive, CaseMode::Sensitive];
574 for pat in patterns {
575 for case in cases {
576 let opt = QueryOptions {
577 case,
578 ..Default::default()
579 };
580 let ci = match case {
581 CaseMode::Insensitive => true,
582 CaseMode::Sensitive => false,
583 CaseMode::Smart => !crate::wtf8::has_uppercase(pat),
584 };
585 let re = RegexBuilder::new(pat)
586 .case_insensitive(ci)
587 .dot_matches_new_line(true)
588 .build()
589 .unwrap();
590 let mut expect: Vec<String> = made
591 .iter()
592 .filter(|n| re.is_match(n.as_bytes()))
593 .cloned()
594 .collect();
595 expect.sort();
596 let mut got = run(&idx, &format!(r#"regex:"{pat}""#), opt);
598 got.sort();
599 assert_eq!(
600 got, expect,
601 "regex `{pat}` (case {case:?}) diverged from the naive oracle"
602 );
603 }
604 }
605 }
606
607 #[test]
608 fn date_filter_utc() {
609 let idx = sample();
610 let r = names(&idx, "dm:>=1650 ext:pdf");
611 assert_eq!(r, vec!["Report.PDF"]);
612 }
613
614 #[test]
617 fn whole_regex_mode_name_and_path_scope() {
618 use super::super::{RegexScope, compile_whole_regex};
619
620 let idx = sample();
621 let run_re = |pat: &str, scope: RegexScope| {
622 let q = compile_whole_regex(pat, CaseMode::Smart, scope).unwrap();
623 let mut got: Vec<String> = search(&idx, &q, &QueryOptions::default())
624 .0
625 .ids
626 .iter()
627 .map(|&id| String::from_utf8_lossy(idx.name(id)).into_owned())
628 .collect();
629 got.sort();
630 got
631 };
632
633 assert_eq!(run_re(r"\.rs$", RegexScope::Name), vec!["main.rs"]);
636 assert_eq!(run_re("^report", RegexScope::Name), vec!["Report.PDF"]); assert_eq!(
638 run_re("pdf|txt", RegexScope::Name),
639 vec!["Report.PDF", "notes.txt"]
640 );
641 assert!(run_re(r"[0-9]", RegexScope::Name).is_empty()); assert_eq!(
646 run_re(r"docs.*\.pdf$", RegexScope::Path),
647 vec!["Report.PDF"]
648 );
649
650 assert!(compile_whole_regex("(", CaseMode::Smart, RegexScope::Name).is_err());
652 assert!(compile_whole_regex("(a{500}){500}", CaseMode::Smart, RegexScope::Name).is_err());
653 }
654
655 #[test]
656 fn empty_query_matches_all_live_entries() {
657 let idx = sample();
658 assert_eq!(names(&idx, "").len(), idx.live_len());
659 }
660
661 #[test]
662 fn sort_orders() {
663 let idx = sample();
664 let by_size = run(
665 &idx,
666 "file:",
667 QueryOptions {
668 sort: SortKey::Size,
669 ..Default::default()
670 },
671 );
672 assert_eq!(
673 by_size,
674 vec!["notes.txt", "main.rs", "Report.PDF", "big.BIN"]
675 );
676 let by_size_desc = run(
677 &idx,
678 "file:",
679 QueryOptions {
680 sort: SortKey::Size,
681 desc: true,
682 ..Default::default()
683 },
684 );
685 assert_eq!(
686 by_size_desc,
687 vec!["big.BIN", "Report.PDF", "main.rs", "notes.txt"]
688 );
689 }
690
691 #[test]
692 fn tombstones_are_excluded() {
693 let mut idx = sample();
694 idx.delete(11); assert!(names(&idx, "report").is_empty());
696 }
697
698 #[test]
699 fn hidden_system_excluded_by_default_and_toggleable() {
700 let mut b = VolumeIndexBuilder::new("C:", 5);
701 let mk = |name: &str| name.encode_utf16().collect::<Vec<u16>>();
702 let (bin, ghost, vis) = (mk("$Recycle.Bin"), mk("ghost.txt"), mk("visible.txt"));
703 let mut push = |rec: u64, parent: u64, name: &[u16], is_dir, is_system| {
704 b.push(RawEntry {
705 parent_frn: Frn(parent),
706 frn: Frn((1 << 48) | rec),
707 name_utf16: name,
708 is_dir,
709 is_reparse: false,
710 is_hidden: false,
711 is_system,
712 size: 1,
713 mtime: 1,
714 });
715 };
716 push(10, 5, &bin, true, true);
717 push(11, 10, &ghost, false, false);
718 push(20, 5, &vis, false, false);
719 let idx = b.finish();
720
721 assert_eq!(names(&idx, "txt"), vec!["visible.txt"]);
722 assert_eq!(names(&idx, "").len(), 2); let all = run(
725 &idx,
726 "txt",
727 QueryOptions {
728 include_hidden_system: true,
729 ..Default::default()
730 },
731 );
732 let mut all = all;
733 all.sort();
734 assert_eq!(all, vec!["ghost.txt", "visible.txt"]);
735 }
736
737 #[test]
741 fn pool_scan_matches_naive_oracle() {
742 use std::fmt::Write as _;
743
744 struct Rng(u64);
745 impl Rng {
746 fn next(&mut self) -> u64 {
747 let mut x = self.0;
748 x ^= x << 13;
749 x ^= x >> 7;
750 x ^= x << 17;
751 self.0 = x;
752 x
753 }
754 }
755 let fragments = [
756 "ab",
757 "abc",
758 "日本",
759 "語",
760 "x",
761 "𠮷",
762 "report",
763 "Report",
764 "ort",
765 "tab",
766 "TAB",
767 "ba",
768 "ファイル",
769 ];
770 let mut rng = Rng(42);
771 let mut b = VolumeIndexBuilder::new("C:", 5);
772 let mut names_made: Vec<String> = Vec::new();
773 for i in 0..500u64 {
774 let mut name = String::new();
775 for _ in 0..=(rng.next() % 4) {
776 name.push_str(fragments[(rng.next() % fragments.len() as u64) as usize]);
777 }
778 write!(name, "_{i}").unwrap();
779 let units: Vec<u16> = name.encode_utf16().collect();
780 b.push(RawEntry {
781 parent_frn: Frn(5),
782 frn: Frn((1 << 48) | (100 + i)),
783 name_utf16: &units,
784 is_dir: false,
785 is_reparse: false,
786 is_hidden: false,
787 is_system: false,
788 size: i,
789 mtime: i as i64,
790 });
791 names_made.push(name);
792 }
793 let mut idx = b.finish();
794 for i in (0..500u64).step_by(7) {
796 let new_name = format!("renamed_{i}_abba");
797 let units: Vec<u16> = new_name.encode_utf16().collect();
798 let first_new = idx.len() as u32;
799 idx.upsert(&RawEntry {
800 parent_frn: Frn(5),
801 frn: Frn((1 << 48) | (100 + i)),
802 name_utf16: &units,
803 is_dir: false,
804 is_reparse: false,
805 is_hidden: false,
806 is_system: false,
807 size: i,
808 mtime: i as i64,
809 });
810 idx.merge_new_into_permutations(first_new);
811 names_made[i as usize] = new_name;
812 }
813
814 let needles = [
815 "ab",
816 "abc",
817 "ba",
818 "abba",
819 "日本",
820 "語x",
821 "𠮷",
822 "report",
823 "ort",
824 "tab",
825 "_3",
826 "renamed",
827 "zzz_nothing",
828 ];
829 for needle in needles {
830 let mut expect: Vec<String> = names_made
831 .iter()
832 .filter(|n| n.to_lowercase().contains(needle))
833 .cloned()
834 .collect();
835 expect.sort();
836 let mut got = names(&idx, needle);
837 got.sort();
838 assert_eq!(got, expect, "needle `{needle}` diverged from oracle");
839 }
840
841 let upper_needles = ["Report", "Rep", "TAB", "Ort"];
845 assert!(
846 names_made.iter().any(|n| n.contains("Report")),
847 "oracle coverage: mixed-case names must exist"
848 );
849 for needle in upper_needles {
850 let mut expect: Vec<String> = names_made
851 .iter()
852 .filter(|n| n.contains(needle))
853 .cloned()
854 .collect();
855 expect.sort();
856 let mut got = names(&idx, needle);
857 got.sort();
858 assert_eq!(got, expect, "smart-case needle `{needle}` diverged");
859 }
860 let sensitive = QueryOptions {
862 case: CaseMode::Sensitive,
863 ..Default::default()
864 };
865 for needle in needles.iter().chain(upper_needles.iter()) {
866 let mut expect: Vec<String> = names_made
867 .iter()
868 .filter(|n| n.contains(needle))
869 .cloned()
870 .collect();
871 expect.sort();
872 let mut got = run(&idx, needle, sensitive);
873 got.sort();
874 assert_eq!(got, expect, "sensitive needle `{needle}` diverged");
875 }
876
877 let sequences: &[&[&str]] = &[
882 &["", "a", "ab", "abb", "abba"],
883 &["r", "re", "rep", "repo", "report"],
884 &["ab", "ab ba", "ab ba _3"],
885 &["日", "日本", "日本 語"],
886 &["t", "ta", "tab", "tab *ab*"],
887 &["re", "renamed", "renamed !zzz"],
888 &["a", "a size:>100", "a size:>100 size:<400"],
889 &["", "x", "x𠮷"],
890 &["or", "Ort"], ];
892 let opts = [
893 QueryOptions::default(),
894 QueryOptions {
895 sort: SortKey::Size,
896 desc: true,
897 ..Default::default()
898 },
899 ];
900 let mut refined_pairs = 0;
901 for opt in opts {
902 for seq in sequences {
903 let mut prev: Option<(super::super::CompiledQuery, Vec<EntryId>)> = None;
904 for text in *seq {
905 let q = compile(&parse(text).unwrap(), opt.case, &UtcResolver).unwrap();
906 let fresh = search(&idx, &q, &opt).0.ids;
907 if let Some((pq, pids)) = &prev
908 && super::super::subsumes(pq, &opt, &q, &opt)
909 {
910 let refined = refine(&idx, &q, &opt, pids).0.ids;
911 assert_eq!(
912 refined, fresh,
913 "refine diverged from fresh search at `{text}` (sort {:?})",
914 opt.sort
915 );
916 refined_pairs += 1;
917 }
918 prev = Some((q, fresh));
919 }
920 }
921 }
922 assert!(
923 refined_pairs >= 20,
924 "subsumption barely fired ({refined_pairs} pairs) — the oracle is vacuous"
925 );
926 }
927}
928
929#[cfg(test)]
930mod proptests {
931 use std::sync::atomic::{AtomicU64, Ordering};
932
933 use proptest::prelude::*;
934
935 use super::super::dates::UtcResolver;
936 use super::super::{
937 CaseMode, CompiledQuery, QueryOptions, RegexScope, compile, compile_whole_regex, parse,
938 subsumes,
939 };
940 use super::{EntryId, refine, search};
941 use crate::index::{Frn, RawEntry, SortKey, VolumeIndex, VolumeIndexBuilder};
942
943 static REFINED_PAIRS: AtomicU64 = AtomicU64::new(0);
949
950 const FRAGMENTS: &[&str] = &[
956 "ab", "abc", "Re", "report", "Report", "ort", "tab", "TAB", "日本", "語", "𠮷", "x",
957 "main", ".rs", ".txt", ".PDF",
958 ];
959
960 #[derive(Debug, Clone)]
964 struct GenEntry {
965 name: String,
966 is_dir: bool,
967 is_hidden: bool,
968 is_system: bool,
969 size: u64,
970 mtime: i64,
971 }
972
973 fn entry_strategy() -> impl Strategy<Value = GenEntry> {
974 (
975 proptest::collection::vec(0usize..FRAGMENTS.len(), 1..=4),
976 any::<bool>(),
977 any::<bool>(),
978 any::<bool>(),
979 0u64..(8u64 << 30),
980 0i64..(20_000i64 * 864_000_000_000),
981 )
982 .prop_map(|(parts, is_dir, is_hidden, is_system, size, mtime)| {
983 let name: String = parts.iter().map(|&i| FRAGMENTS[i]).collect();
984 GenEntry {
985 name,
986 is_dir,
987 is_hidden,
988 is_system,
989 size,
990 mtime,
991 }
992 })
993 }
994
995 fn build_index(entries: &[GenEntry]) -> VolumeIndex {
996 let mut b = VolumeIndexBuilder::new("C:", 5);
997 for (i, e) in entries.iter().enumerate() {
998 let units: Vec<u16> = e.name.encode_utf16().collect();
999 b.push(RawEntry {
1000 parent_frn: Frn(5),
1001 frn: Frn((1 << 48) | (100 + i as u64)),
1002 name_utf16: &units,
1003 is_dir: e.is_dir,
1004 is_reparse: false,
1005 is_hidden: e.is_hidden,
1006 is_system: e.is_system,
1007 size: e.size,
1008 mtime: e.mtime,
1009 });
1010 }
1011 b.finish()
1012 }
1013
1014 #[derive(Debug, Clone)]
1018 enum Step {
1019 Frag(usize),
1021 Term(usize),
1023 Wildcard(usize),
1025 Not(usize),
1027 Size(u64),
1029 IsDir(bool),
1031 Ext(usize),
1033 Truncate,
1036 }
1037
1038 fn step_strategy() -> impl Strategy<Value = Step> {
1039 prop_oneof![
1040 (0usize..FRAGMENTS.len()).prop_map(Step::Frag),
1041 (0usize..FRAGMENTS.len()).prop_map(Step::Term),
1042 (0usize..FRAGMENTS.len()).prop_map(Step::Wildcard),
1043 (0usize..FRAGMENTS.len()).prop_map(Step::Not),
1044 (1u64..(4u64 << 30)).prop_map(Step::Size),
1045 any::<bool>().prop_map(Step::IsDir),
1046 (0usize..3usize).prop_map(Step::Ext),
1047 Just(Step::Truncate),
1048 ]
1049 }
1050
1051 fn texts_from_steps(steps: &[Step]) -> Vec<String> {
1054 use std::fmt::Write as _;
1055 const EXTS: &[&str] = &["rs", "txt", "pdf"];
1056 let mut cur = String::new();
1057 let mut out = vec![cur.clone()];
1058 for step in steps {
1059 match step {
1060 Step::Frag(i) => cur.push_str(FRAGMENTS[*i]),
1061 Step::Term(i) => {
1062 cur.push(' ');
1063 cur.push_str(FRAGMENTS[*i]);
1064 }
1065 Step::Wildcard(i) => {
1066 cur.push_str(" *");
1067 cur.push_str(FRAGMENTS[*i].trim_start_matches('.'));
1068 cur.push('*');
1069 }
1070 Step::Not(i) => {
1071 cur.push_str(" !");
1072 cur.push_str(FRAGMENTS[*i].trim_start_matches('.'));
1073 }
1074 Step::Size(min) => {
1075 write!(cur, " size:>{min}").expect("writing to a String is infallible");
1076 }
1077 Step::IsDir(d) => {
1078 cur.push_str(if *d { " folder:" } else { " file:" });
1079 }
1080 Step::Ext(i) => {
1081 cur.push_str(" ext:");
1082 cur.push_str(EXTS[*i]);
1083 }
1084 Step::Truncate => {
1085 let trimmed = cur.trim_end();
1086 let keep = trimmed.rfind(char::is_whitespace).map_or(0, |p| p);
1087 cur.truncate(keep);
1088 }
1089 }
1090 out.push(cur.clone());
1091 }
1092 out
1093 }
1094
1095 fn compile_text(text: &str, opt: &QueryOptions) -> Option<CompiledQuery> {
1099 if opt.regex_mode {
1100 compile_whole_regex(text, opt.case, opt.regex_scope).ok()
1101 } else {
1102 compile(&parse(text).ok()?, opt.case, &UtcResolver).ok()
1103 }
1104 }
1105
1106 fn options_strategy() -> impl Strategy<Value = QueryOptions> {
1107 (
1108 prop_oneof![
1109 Just(SortKey::Name),
1110 Just(SortKey::Size),
1111 Just(SortKey::Mtime)
1112 ],
1113 any::<bool>(),
1114 prop_oneof![
1115 Just(CaseMode::Smart),
1116 Just(CaseMode::Insensitive),
1117 Just(CaseMode::Sensitive)
1118 ],
1119 any::<bool>(),
1120 any::<bool>(),
1121 prop_oneof![Just(RegexScope::Name), Just(RegexScope::Path)],
1122 )
1123 .prop_map(
1124 |(sort, desc, case, include_hidden_system, regex_mode, regex_scope)| QueryOptions {
1125 sort,
1126 desc,
1127 case,
1128 include_hidden_system,
1129 regex_mode,
1130 regex_scope,
1131 },
1132 )
1133 }
1134
1135 proptest! {
1136 #![proptest_config(ProptestConfig::with_cases(256))]
1146 #[test]
1147 fn refine_equals_fresh_whenever_subsumed(
1148 entries in proptest::collection::vec(entry_strategy(), 1..40),
1149 steps in proptest::collection::vec(step_strategy(), 1..8),
1150 opt in options_strategy(),
1151 ) {
1152 let idx = build_index(&entries);
1153 let texts = texts_from_steps(&steps);
1154
1155 let mut prev: Option<(CompiledQuery, Vec<EntryId>)> = None;
1156 for text in &texts {
1157 let Some(q) = compile_text(text, &opt) else {
1158 prev = None;
1161 continue;
1162 };
1163 let fresh = search(&idx, &q, &opt).0.ids;
1164
1165 if let Some((pq, pids)) = &prev
1166 && subsumes(pq, &opt, &q, &opt)
1167 {
1168 let refined = refine(&idx, &q, &opt, pids).0.ids;
1169 prop_assert_eq!(
1170 &refined,
1171 &fresh,
1172 "refine diverged from fresh search at `{}` (opt {:?})",
1173 text,
1174 opt
1175 );
1176 REFINED_PAIRS.fetch_add(1, Ordering::Relaxed);
1177 }
1178 prev = Some((q, fresh));
1179 }
1180 }
1181 }
1182
1183 #[test]
1191 fn subsumption_fires_at_least_sometimes() {
1192 let entries: Vec<GenEntry> = (0..30u64)
1196 .map(|i| {
1197 let name = format!(
1198 "{}{}{}",
1199 FRAGMENTS[i as usize % FRAGMENTS.len()],
1200 FRAGMENTS[(i as usize + 3) % FRAGMENTS.len()],
1201 if i % 2 == 0 { ".rs" } else { ".txt" },
1202 );
1203 GenEntry {
1204 name,
1205 is_dir: i % 5 == 0,
1206 is_hidden: i % 7 == 0,
1207 is_system: i % 11 == 0,
1208 size: i * 1024,
1209 mtime: i as i64 * 864_000_000_000,
1210 }
1211 })
1212 .collect();
1213 let idx = build_index(&entries);
1214
1215 let mut local_fired = 0u64;
1216 for sort in [SortKey::Name, SortKey::Size, SortKey::Mtime] {
1217 for case in [CaseMode::Smart, CaseMode::Insensitive, CaseMode::Sensitive] {
1218 let opt = QueryOptions {
1219 sort,
1220 case,
1221 ..Default::default()
1222 };
1223 let seq = ["", "r", "re", "rep", "repo", "report", "report .rs"];
1224 let mut prev: Option<(CompiledQuery, Vec<EntryId>)> = None;
1225 for text in seq {
1226 let q = compile_text(text, &opt).expect("static texts compile");
1227 let fresh = search(&idx, &q, &opt).0.ids;
1228 if let Some((pq, pids)) = &prev
1229 && subsumes(pq, &opt, &q, &opt)
1230 {
1231 let refined = refine(&idx, &q, &opt, pids).0.ids;
1232 assert_eq!(
1233 refined, fresh,
1234 "refine diverged from fresh search at `{text}` (opt {opt:?})"
1235 );
1236 local_fired += 1;
1237 }
1238 prev = Some((q, fresh));
1239 }
1240 }
1241 }
1242 assert!(
1243 local_fired >= 9,
1244 "subsumption barely fired ({local_fired}) — the oracle is vacuous"
1245 );
1246 REFINED_PAIRS.fetch_add(local_fired, Ordering::Relaxed);
1249 }
1250}