Skip to main content

fmf_core\query/
subsume.rs

1//! Query subsumption: does the result set of `next` provably fit inside the
2//! result set of `prev`? When it does (and the index generation is
3//! unchanged), the engine refines the cached `prev` ids instead of scanning
4//! — the incremental-typing fast path.
5//!
6//! Every rule here must be *sound* (false positives lose results — the one
7//! unacceptable failure mode); being incomplete merely costs a cold scan.
8//! The oracle property test in exec.rs cross-checks refine == fresh search
9//! over random names and typing sequences.
10
11use super::QueryOptions;
12use super::compile::{CompiledQuery, Matcher};
13use crate::wtf8;
14
15/// True when every entry matching `next` (under `next_opt`) is guaranteed to
16/// be present in `prev`'s materialized ids, in the right order.
17pub fn subsumes(
18    prev: &CompiledQuery,
19    prev_opt: &QueryOptions,
20    next: &CompiledQuery,
21    next_opt: &QueryOptions,
22) -> bool {
23    // Cached ids are materialized in prev's sort order — refine preserves
24    // subsequence order, so the sort must be identical.
25    if prev_opt.sort != next_opt.sort || prev_opt.desc != next_opt.desc {
26        return false;
27    }
28    // prev must have seen at least as much: hidden/system included before,
29    // narrowed now is fine (refine re-filters); the reverse is not.
30    if next_opt.include_hidden_system && !prev_opt.include_hidden_system {
31        return false;
32    }
33    // CaseMode never needs comparing: its effect is baked into each compiled
34    // matcher's `folded` flag, which the per-term rules check.
35
36    // The empty query (single empty group) covers every live entry.
37    let prev_is_match_all = prev.groups.len() == 1 && prev.groups[0].all_terms().next().is_none();
38    if prev_is_match_all {
39        return true;
40    }
41    // v1 keeps OR out of the implication algebra: one AND group each.
42    let [prev_group] = prev.groups.as_slice() else {
43        return false;
44    };
45    let [next_group] = next.groups.as_slice() else {
46        return false;
47    };
48
49    // Every prev condition must be implied by some next condition; next's
50    // extra terms only narrow an AND group further.
51    prev_group.all_terms().all(|p| {
52        next_group.all_terms().any(|n| {
53            if p.negated != n.negated {
54                return false;
55            }
56            if p.negated {
57                // ¬A ⇒ ¬B only holds for B ⇒ A; equality is the safe core.
58                matcher_eq(&p.matcher, &n.matcher)
59            } else {
60                implies(&n.matcher, &p.matcher)
61            }
62        }) || (!p.negated && matches!(p.matcher, Matcher::True))
63    })
64}
65
66/// Needle-domain bridge: a byte needle proven about one pool implies one
67/// about the other only in the orig→folded direction. Valid-UTF-8 needles
68/// can only match at code-point boundaries and folding is length-preserving
69/// per code point, so a name matching `n` at offset i guarantees the lower
70/// pool holds `fold(n)` at i. The reverse (folded match → original bytes)
71/// does not hold.
72fn bridge_needle(
73    n_bytes: &[u8],
74    n_folded: bool,
75    p_folded: bool,
76) -> Option<std::borrow::Cow<'_, [u8]>> {
77    match (n_folded, p_folded) {
78        (true, true) | (false, false) => Some(std::borrow::Cow::Borrowed(n_bytes)),
79        (false, true) => std::str::from_utf8(n_bytes)
80            .ok()
81            .map(|s| std::borrow::Cow::Owned(wtf8::fold_str(s).into_bytes())),
82        (true, false) => None,
83    }
84}
85
86/// Does a positive match of `n` guarantee a positive match of `p`?
87fn implies(n: &Matcher, p: &Matcher) -> bool {
88    use Matcher::{Ext, IsDir, Mtime, NamePrefix, NameSub, NameSuffix, Size, True};
89    match (n, p) {
90        // Anything implies the always-true matcher.
91        (_, True) => true,
92
93        // Name literals: containment in the right pool domain.
94        (
95            NameSub {
96                finder: nf,
97                folded: nfo,
98            },
99            NameSub {
100                finder: pf,
101                folded: pfo,
102            },
103        ) => bridge_needle(nf.needle(), *nfo, *pfo)
104            .is_some_and(|n| memchr::memmem::find(&n, pf.needle()).is_some()),
105        // A prefix/suffix match still means "the name contains these bytes".
106        (
107            NamePrefix { bytes, folded: nfo } | NameSuffix { bytes, folded: nfo },
108            NameSub {
109                finder: pf,
110                folded: pfo,
111            },
112        ) => bridge_needle(bytes, *nfo, *pfo)
113            .is_some_and(|n| memchr::memmem::find(&n, pf.needle()).is_some()),
114        (
115            NamePrefix {
116                bytes: nb,
117                folded: nfo,
118            },
119            NamePrefix {
120                bytes: pb,
121                folded: pfo,
122            },
123        ) => bridge_needle(nb, *nfo, *pfo).is_some_and(|n| n.starts_with(pb)),
124        (
125            NameSuffix {
126                bytes: nb,
127                folded: nfo,
128            },
129            NameSuffix {
130                bytes: pb,
131                folded: pfo,
132            },
133        ) => bridge_needle(nb, *nfo, *pfo).is_some_and(|n| n.ends_with(pb)),
134
135        // Set/range narrowing. Ext semantics are *equality* on the extension,
136        // so subset is the only sound relation ("ext:dl" never implies
137        // "ext:d" — and never reaches here because {dl} ⊄ {d}).
138        (Ext { exts: ne }, Ext { exts: pe }) => ne.iter().all(|e| pe.contains(e)),
139        (
140            Size {
141                min: nmin,
142                max: nmax,
143            },
144            Size {
145                min: pmin,
146                max: pmax,
147            },
148        ) => nmin >= pmin && nmax <= pmax,
149        (
150            Mtime {
151                min: nmin,
152                max: nmax,
153            },
154            Mtime {
155                min: pmin,
156                max: pmax,
157            },
158        ) => nmin >= pmin && nmax <= pmax,
159        (IsDir(a), IsDir(b)) => a == b,
160
161        // Path / regex terms: only exact equality is provable cheaply.
162        _ => matcher_eq(n, p),
163    }
164}
165
166/// Structural equality of two compiled matchers.
167fn matcher_eq(a: &Matcher, b: &Matcher) -> bool {
168    use Matcher::{
169        Ext, IsDir, Mtime, NamePrefix, NameRegex, NameSub, NameSuffix, PathRegex, PathSub, Size,
170        True,
171    };
172    match (a, b) {
173        (True, True) => true,
174        (
175            NameSub {
176                finder: fa,
177                folded: ca,
178            },
179            NameSub {
180                finder: fb,
181                folded: cb,
182            },
183        )
184        | (
185            PathSub {
186                finder: fa,
187                folded: ca,
188            },
189            PathSub {
190                finder: fb,
191                folded: cb,
192            },
193        ) => ca == cb && fa.needle() == fb.needle(),
194        (
195            NamePrefix {
196                bytes: ba,
197                folded: ca,
198            },
199            NamePrefix {
200                bytes: bb,
201                folded: cb,
202            },
203        )
204        | (
205            NameSuffix {
206                bytes: ba,
207                folded: ca,
208            },
209            NameSuffix {
210                bytes: bb,
211                folded: cb,
212            },
213        ) => ca == cb && ba == bb,
214        (NameRegex { re: ra }, NameRegex { re: rb })
215        | (PathRegex { re: ra }, PathRegex { re: rb }) => ra.as_str() == rb.as_str(),
216        (Ext { exts: ea }, Ext { exts: eb }) => ea == eb,
217        (Size { min: ia, max: xa }, Size { min: ib, max: xb }) => ia == ib && xa == xb,
218        (Mtime { min: ia, max: xa }, Mtime { min: ib, max: xb }) => ia == ib && xa == xb,
219        (IsDir(a), IsDir(b)) => a == b,
220        _ => false,
221    }
222}
223
224#[cfg(test)]
225mod tests {
226    use super::super::dates::UtcResolver;
227    use super::super::{CaseMode, compile, parse};
228    use super::*;
229
230    fn q(text: &str) -> CompiledQuery {
231        compile(&parse(text).unwrap(), CaseMode::Smart, &UtcResolver).unwrap()
232    }
233
234    fn subsumed(prev: &str, next: &str) -> bool {
235        let o = QueryOptions::default();
236        subsumes(&q(prev), &o, &q(next), &o)
237    }
238
239    #[test]
240    fn typing_extends_substring() {
241        assert!(subsumed("", "w"));
242        assert!(subsumed("w", "wi"));
243        assert!(subsumed("wi", "win"));
244        assert!(subsumed("win", "win .rs"));
245        assert!(!subsumed("win", "wi"), "backspace must go cold");
246        assert!(!subsumed("win", "wood"));
247    }
248
249    #[test]
250    fn smart_case_bridges_orig_to_folded_only() {
251        // "wi" (folded) ⊂ "Win" (orig, smart-case sensitive): a name
252        // containing "Win" folds to a lower name containing "win" ⊇ "wi".
253        assert!(subsumed("wi", "Win"));
254        assert!(subsumed("Win", "Windows"), "same sensitive domain extends");
255        // The reverse bridge is unsound: lower "win" ⊅ orig "Win".
256        assert!(
257            !subsumed("Win", "win"),
258            "folded next cannot prove orig prev"
259        );
260    }
261
262    #[test]
263    fn filters_narrow_soundly() {
264        assert!(subsumed("ext:rs;txt", "ext:rs"), "ext subset narrows");
265        assert!(!subsumed("ext:d", "ext:dl"), "ext is equality, not prefix");
266        assert!(!subsumed("ext:rs", "ext:rs;txt"), "superset widens");
267        assert!(subsumed("size:>100", "size:>200"));
268        assert!(!subsumed("size:>200", "size:>100"));
269        assert!(subsumed("report", "report file:"), "added term narrows");
270        assert!(!subsumed("report file:", "report"), "dropped term widens");
271    }
272
273    #[test]
274    fn negation_requires_exact_match() {
275        assert!(subsumed("rs !test", "rs !test"));
276        assert!(subsumed("rs", "rs !test"), "added negation narrows");
277        assert!(!subsumed("rs !test", "rs"), "dropped negation widens");
278        assert!(
279            !subsumed("rs !test", "rs !tes"),
280            "¬tes does not imply ¬test"
281        );
282    }
283
284    #[test]
285    fn or_and_option_changes_go_cold() {
286        assert!(!subsumed("a | b", "ab"), "OR prev is out of the v1 algebra");
287        assert!(!subsumed("ab", "ab | cd"));
288        assert!(subsumed("", "ab | cd"), "match-all subsumes even an OR");
289
290        let prev = q("win");
291        let next = q("wind");
292        let base = QueryOptions::default();
293        let desc = QueryOptions { desc: true, ..base };
294        assert!(!subsumes(&prev, &base, &next, &desc), "sort flip");
295        let hidden = QueryOptions {
296            include_hidden_system: true,
297            ..base
298        };
299        assert!(!subsumes(&prev, &base, &next, &hidden), "widening toggle");
300        assert!(subsumes(&prev, &hidden, &next, &base), "narrowing toggle");
301    }
302
303    #[test]
304    fn wildcard_and_path_terms_need_equality() {
305        assert!(subsumed("*.rs", "*.rs"));
306        assert!(subsumed("*.rs", "*.rs main"), "extra term narrows");
307        // ".rs" suffix-implies the substring "rs" — sound and allowed:
308        assert!(subsumed("rs", "*.rs"));
309        // …but a *different* wildcard never implies another one.
310        assert!(!subsumed("*.rsx", "*.rs"));
311        assert!(subsumed(r"path:src", r"path:src main"));
312        assert!(
313            !subsumed(r"path:src", r"path:srcmain"),
314            "path needs equality"
315        );
316    }
317}