1use thiserror::Error;
4
5use super::dates::Civil;
6
7#[derive(Debug, Error, PartialEq, Eq)]
9pub enum ParseError {
10 #[error("unclosed quote")]
12 UnclosedQuote,
13 #[error("invalid size filter `{0}`")]
15 InvalidSize(String),
16 #[error("invalid date filter `{0}`")]
18 InvalidDate(String),
19 #[error("`{0}` cannot be negated")]
21 CannotNegate(String),
22}
23
24#[derive(Debug, Clone, PartialEq)]
26pub enum Term {
27 Name(String),
29 Path(String),
31 Wildcard(String),
33 PathWildcard(String),
35 Regex(String),
37 Ext(Vec<String>),
39 Size {
41 min: u64,
43 max: u64,
45 },
46 Mtime {
48 start: Option<Civil>,
50 end: Option<Civil>,
52 },
53 IsDir(bool),
55 Not(Box<Self>),
57}
58
59#[derive(Debug, Clone, PartialEq)]
61pub struct Ast {
62 pub groups: Vec<Vec<Term>>,
64}
65
66pub fn parse(input: &str) -> Result<Ast, ParseError> {
78 let mut groups: Vec<Vec<Term>> = vec![Vec::new()];
79 let mut rest = input;
80
81 loop {
82 rest = rest.trim_start();
83 if rest.is_empty() {
84 break;
85 }
86 if let Some(r) = rest.strip_prefix('|') {
87 groups.push(Vec::new());
88 rest = r;
89 continue;
90 }
91
92 let mut negated = false;
93 while let Some(r) = rest.strip_prefix('!') {
94 negated = !negated;
95 rest = r;
96 }
97
98 let (atom, r) = read_atom(rest)?;
99 rest = r;
100 if atom.is_empty() {
101 continue;
102 }
103 let terms = terms_from_atom(&atom, negated)?;
104 for t in terms {
105 groups
106 .last_mut()
107 .expect("groups is seeded with one element")
108 .push(t);
109 }
110 }
111
112 Ok(Ast { groups })
113}
114
115fn read_atom(input: &str) -> Result<(String, &str), ParseError> {
118 let mut out = String::new();
119 let mut it = input.char_indices();
120 let mut in_quotes = false;
121 loop {
122 let Some((i, c)) = it.next() else {
123 if in_quotes {
124 return Err(ParseError::UnclosedQuote);
125 }
126 return Ok((out, ""));
127 };
128 match c {
129 '"' => {
130 in_quotes = !in_quotes;
131 out.push(c);
132 }
133 c if !in_quotes && (c.is_whitespace() || c == '|') => {
134 return Ok((out, &input[i..]));
135 }
136 c => out.push(c),
137 }
138 }
139}
140
141fn unquote(s: &str) -> &str {
142 s.strip_prefix('"')
143 .and_then(|s| s.strip_suffix('"'))
144 .unwrap_or(s)
145}
146
147const FIELDS: &[&str] = &["ext", "path", "size", "dm", "regex", "file", "folder"];
148
149fn terms_from_atom(atom: &str, negated: bool) -> Result<Vec<Term>, ParseError> {
150 let wrap = |t: Term| if negated { Term::Not(Box::new(t)) } else { t };
151
152 if !atom.starts_with('"')
154 && let Some(colon) = atom.find(':')
155 {
156 let field = &atom[..colon];
157 let raw_value = &atom[colon + 1..];
158 if FIELDS.contains(&field.to_ascii_lowercase().as_str()) {
159 let value = unquote(raw_value);
160 return match field.to_ascii_lowercase().as_str() {
161 "ext" => {
162 let exts: Vec<String> = value
163 .split([';', ','])
164 .map(|e| e.trim().trim_start_matches('.').to_string())
165 .filter(|e| !e.is_empty())
166 .collect();
167 Ok(vec![wrap(Term::Ext(exts))])
168 }
169 "path" => Ok(vec![wrap(if has_wildcard(value) {
170 Term::PathWildcard(value.to_string())
171 } else {
172 Term::Path(value.to_string())
173 })]),
174 "size" => {
175 let (min, max) = parse_size_range(value)
176 .ok_or_else(|| ParseError::InvalidSize(value.to_string()))?;
177 Ok(vec![wrap(Term::Size { min, max })])
178 }
179 "dm" => {
180 let (start, end) = parse_date_range(value)
181 .ok_or_else(|| ParseError::InvalidDate(value.to_string()))?;
182 Ok(vec![wrap(Term::Mtime { start, end })])
183 }
184 "regex" => Ok(vec![wrap(Term::Regex(unquote(raw_value).to_string()))]),
186 "file" | "folder" => {
187 let is_dir = field.eq_ignore_ascii_case("folder");
188 if value.is_empty() {
189 Ok(vec![wrap(Term::IsDir(is_dir))])
190 } else if negated {
191 Err(ParseError::CannotNegate(format!("{field}:{value}")))
193 } else {
194 Ok(vec![Term::IsDir(is_dir), name_or_path_term(value)])
195 }
196 }
197 _ => unreachable!(),
198 };
199 }
200 }
201
202 Ok(vec![wrap(name_or_path_term(unquote(atom)))])
203}
204
205fn name_or_path_term(value: &str) -> Term {
206 let wild = has_wildcard(value);
207 let pathy = value.contains('\\');
208 match (pathy, wild) {
209 (true, true) => Term::PathWildcard(value.to_string()),
210 (true, false) => Term::Path(value.to_string()),
211 (false, true) => Term::Wildcard(value.to_string()),
212 (false, false) => Term::Name(value.to_string()),
213 }
214}
215
216fn has_wildcard(s: &str) -> bool {
217 s.contains(['*', '?'])
218}
219
220fn parse_size_range(v: &str) -> Option<(u64, u64)> {
223 if let Some((a, b)) = v.split_once("..") {
224 return Some((parse_size(a)?, parse_size(b)?));
225 }
226 if let Some(r) = v.strip_prefix(">=") {
227 return Some((parse_size(r)?, u64::MAX));
228 }
229 if let Some(r) = v.strip_prefix("<=") {
230 return Some((0, parse_size(r)?));
231 }
232 if let Some(r) = v.strip_prefix('>') {
233 return Some((parse_size(r)?.checked_add(1)?, u64::MAX));
234 }
235 if let Some(r) = v.strip_prefix('<') {
236 return Some((0, parse_size(r)?.checked_sub(1)?));
237 }
238 let exact = parse_size(v)?;
239 Some((exact, exact))
240}
241
242fn parse_size(v: &str) -> Option<u64> {
243 let v = v.trim().to_ascii_lowercase();
244 if v.is_empty() {
245 return None;
246 }
247 let (num, mult) = if let Some(n) = v.strip_suffix("kb").or_else(|| v.strip_suffix('k')) {
248 (n, 1u64 << 10)
249 } else if let Some(n) = v.strip_suffix("mb").or_else(|| v.strip_suffix('m')) {
250 (n, 1u64 << 20)
251 } else if let Some(n) = v.strip_suffix("gb").or_else(|| v.strip_suffix('g')) {
252 (n, 1u64 << 30)
253 } else if let Some(n) = v.strip_suffix("tb").or_else(|| v.strip_suffix('t')) {
254 (n, 1u64 << 40)
255 } else if let Some(n) = v.strip_suffix('b') {
256 (n, 1)
257 } else {
258 (v.as_str(), 1)
259 };
260 let num = num.trim();
261 if num.is_empty() {
262 return None;
263 }
264 if let Ok(i) = num.parse::<u64>() {
265 return i.checked_mul(mult);
266 }
267 let f: f64 = num.parse().ok()?;
268 if !f.is_finite() || f < 0.0 {
269 return None;
270 }
271 Some((f * mult as f64) as u64)
272}
273
274fn parse_date_range(v: &str) -> Option<(Option<Civil>, Option<Civil>)> {
277 if let Some((a, b)) = v.split_once("..") {
278 let (sa, _) = parse_date_period(a)?;
279 let (_, eb) = parse_date_period(b)?;
280 return Some((Some(sa), Some(eb)));
281 }
282 if let Some(r) = v.strip_prefix(">=") {
283 let (s, _) = parse_date_period(r)?;
284 return Some((Some(s), None));
285 }
286 if let Some(r) = v.strip_prefix("<=") {
287 let (_, e) = parse_date_period(r)?;
288 return Some((None, Some(e)));
289 }
290 if let Some(r) = v.strip_prefix('>') {
291 let (_, e) = parse_date_period(r)?;
292 return Some((Some(e), None));
293 }
294 if let Some(r) = v.strip_prefix('<') {
295 let (s, _) = parse_date_period(r)?;
296 return Some((None, Some(s)));
297 }
298 let (s, e) = parse_date_period(v)?;
299 Some((Some(s), Some(e)))
300}
301
302fn parse_date_period(v: &str) -> Option<(Civil, Civil)> {
304 let parts: Vec<&str> = v.trim().split(['-', '/']).collect();
305 let nums: Vec<u32> = parts
306 .iter()
307 .map(|p| p.parse::<u32>().ok())
308 .collect::<Option<_>>()?;
309 let &year = nums.first()?;
318 if !(1601..=9999).contains(&year) {
319 return None;
320 }
321 match nums.as_slice() {
322 [y] => {
323 let start = Civil {
324 y: *y as i32,
325 m: 1,
326 d: 1,
327 };
328 let end = Civil {
329 y: *y as i32 + 1,
330 m: 1,
331 d: 1,
332 };
333 (start.is_valid() && end.is_valid()).then_some((start, end))
334 }
335 [y, m] => {
336 let start = Civil {
337 y: *y as i32,
338 m: *m,
339 d: 1,
340 };
341 start
342 .is_valid()
343 .then_some((start, start.first_of_next_month()))
344 }
345 [y, m, d] => {
346 let start = Civil {
347 y: *y as i32,
348 m: *m,
349 d: *d,
350 };
351 start.is_valid().then_some((start, start.next_day()))
352 }
353 _ => None,
354 }
355}
356
357#[cfg(test)]
358mod tests {
359 use super::*;
360
361 fn p(s: &str) -> Ast {
362 parse(s).unwrap()
363 }
364
365 #[test]
366 fn plain_words_are_anded() {
367 assert_eq!(
368 p("foo bar").groups,
369 vec![vec![Term::Name("foo".into()), Term::Name("bar".into())]]
370 );
371 }
372
373 #[test]
374 fn or_splits_groups() {
375 assert_eq!(
376 p("foo | bar baz").groups,
377 vec![
378 vec![Term::Name("foo".into())],
379 vec![Term::Name("bar".into()), Term::Name("baz".into())],
380 ]
381 );
382 }
383
384 #[test]
385 fn negation_and_double_negation() {
386 assert_eq!(
387 p("!tmp").groups[0][0],
388 Term::Not(Box::new(Term::Name("tmp".into())))
389 );
390 assert_eq!(p("!!tmp").groups[0][0], Term::Name("tmp".into()));
391 }
392
393 #[test]
394 fn phrase_keeps_spaces() {
395 assert_eq!(
396 p(r#""two words""#).groups[0][0],
397 Term::Name("two words".into())
398 );
399 }
400
401 #[test]
402 fn backslash_switches_to_path() {
403 assert_eq!(
404 p(r"docs\reports").groups[0][0],
405 Term::Path(r"docs\reports".into())
406 );
407 }
408
409 #[test]
410 fn wildcards_detected() {
411 assert_eq!(p("*.rs").groups[0][0], Term::Wildcard("*.rs".into()));
412 assert_eq!(
413 p(r"src\*.rs").groups[0][0],
414 Term::PathWildcard(r"src\*.rs".into())
415 );
416 }
417
418 #[test]
419 fn ext_filter_splits_csv() {
420 assert_eq!(
421 p("ext:jpg;png,.gif").groups[0][0],
422 Term::Ext(vec!["jpg".into(), "png".into(), "gif".into()])
423 );
424 }
425
426 #[test]
427 fn dm_out_of_range_year_is_rejected_without_overflow() {
428 for v in [
435 "dm:2147483647",
436 "dm:2147483647-12",
437 "dm:2147483648-1-1",
438 "dm:2020..2147483647",
439 "dm:4294967295",
440 "dm:99999999",
441 ] {
442 assert!(
443 matches!(parse(v), Err(ParseError::InvalidDate(_))),
444 "{v} should be rejected as InvalidDate, not panic",
445 );
446 }
447 assert!(matches!(p("dm:2024").groups[0][0], Term::Mtime { .. }));
449 }
450
451 #[test]
452 fn quoted_filter_value() {
453 assert_eq!(
454 p(r#"path:"Program Files""#).groups[0][0],
455 Term::Path("Program Files".into())
456 );
457 }
458
459 #[test]
460 fn size_ranges() {
461 assert_eq!(
462 p("size:>1kb").groups[0][0],
463 Term::Size {
464 min: 1025,
465 max: u64::MAX
466 }
467 );
468 assert_eq!(
469 p("size:1kb..1mb").groups[0][0],
470 Term::Size {
471 min: 1024,
472 max: 1 << 20
473 }
474 );
475 assert_eq!(
476 p("size:1.5kb").groups[0][0],
477 Term::Size {
478 min: 1536,
479 max: 1536
480 }
481 );
482 assert!(parse("size:abc").is_err());
483 }
484
485 #[test]
486 fn date_ranges() {
487 let y2024 = Civil {
488 y: 2024,
489 m: 1,
490 d: 1,
491 };
492 let y2025 = Civil {
493 y: 2025,
494 m: 1,
495 d: 1,
496 };
497 assert_eq!(
498 p("dm:2024").groups[0][0],
499 Term::Mtime {
500 start: Some(y2024),
501 end: Some(y2025)
502 }
503 );
504 assert_eq!(
505 p("dm:>=2024-03").groups[0][0],
506 Term::Mtime {
507 start: Some(Civil {
508 y: 2024,
509 m: 3,
510 d: 1
511 }),
512 end: None
513 }
514 );
515 assert_eq!(
516 p("dm:2024/02/28..2024/02/29").groups[0][0],
517 Term::Mtime {
518 start: Some(Civil {
519 y: 2024,
520 m: 2,
521 d: 28
522 }),
523 end: Some(Civil {
524 y: 2024,
525 m: 3,
526 d: 1
527 }),
528 }
529 );
530 assert!(parse("dm:2023-02-29").is_err());
531 }
532
533 #[test]
534 fn folder_with_value_expands() {
535 assert_eq!(
536 p("folder:src").groups[0],
537 vec![Term::IsDir(true), Term::Name("src".into())]
538 );
539 assert!(parse("!folder:src").is_err());
540 assert_eq!(
541 p("!folder:").groups[0][0],
542 Term::Not(Box::new(Term::IsDir(true)))
543 );
544 }
545
546 #[test]
547 fn unknown_colon_atom_is_a_name() {
548 assert_eq!(p("12:30").groups[0][0], Term::Name("12:30".into()));
549 }
550
551 #[test]
552 fn unclosed_quote_errors() {
553 assert_eq!(parse(r#""abc"#), Err(ParseError::UnclosedQuote));
554 }
555
556 #[test]
557 fn empty_query_is_match_all() {
558 assert_eq!(p("").groups, vec![Vec::<Term>::new()]);
559 }
560}
561
562#[cfg(test)]
563mod proptests {
564 use proptest::{prop_assert, proptest};
565
566 use super::parse;
567
568 proptest! {
569 #[test]
574 fn parse_never_panics(s in ".*") {
575 if let Ok(ast) = parse(&s) {
576 prop_assert!(!ast.groups.is_empty());
577 }
578 }
579
580 #[test]
584 fn parse_never_panics_on_query_alphabet(
585 s in "[a-zA-Z0-9 |!\"*?:;.<>=,/\\\\-]{0,40}"
586 ) {
587 if let Ok(ast) = parse(&s) {
588 prop_assert!(!ast.groups.is_empty());
589 }
590 }
591 }
592}