1use parking_lot::Mutex;
9
10use super::{EntryId, VolumeIndex, flags};
11
12const SNAPSHOT_MAGIC: &[u8; 8] = b"FMFIDX04";
16
17const fn pod_bytes<T: Copy>(v: &[T]) -> &[u8] {
18 unsafe { std::slice::from_raw_parts(v.as_ptr().cast::<u8>(), std::mem::size_of_val(v)) }
20}
21
22fn write_vec<T: Copy, W: std::io::Write>(
23 w: &mut W,
24 h: &mut xxhash_rust::xxh64::Xxh64,
25 v: &[T],
26) -> std::io::Result<()> {
27 let bytes = pod_bytes(v);
28 let len = (bytes.len() as u64).to_le_bytes();
29 h.update(&len);
30 w.write_all(&len)?;
31 h.update(bytes);
32 w.write_all(bytes)
33}
34
35fn read_vec<T: Copy, R: std::io::Read>(
36 r: &mut R,
37 h: &mut xxhash_rust::xxh64::Xxh64,
38) -> std::io::Result<Vec<T>> {
39 use std::io::{Error, ErrorKind};
40 let mut len8 = [0u8; 8];
41 r.read_exact(&mut len8)?;
42 h.update(&len8);
43 let len = u64::from_le_bytes(len8) as usize;
44 let elem = std::mem::size_of::<T>();
45 if !len.is_multiple_of(elem) {
46 return Err(Error::new(ErrorKind::InvalidData, "section size mismatch"));
47 }
48 let elems = len / elem;
53 let mut out: Vec<T> = Vec::new();
54 if out.try_reserve_exact(elems).is_err() {
55 return Err(Error::new(ErrorKind::InvalidData, "section size overflow"));
56 }
57 let bytes = unsafe { std::slice::from_raw_parts_mut(out.as_mut_ptr().cast::<u8>(), len) };
61 for chunk in bytes.chunks_mut(8 << 20) {
62 r.read_exact(chunk)?;
63 h.update(chunk);
64 }
65 unsafe { out.set_len(elems) };
66 Ok(out)
67}
68
69impl VolumeIndex {
70 pub fn write_snapshot<W: std::io::Write>(
76 &self,
77 w: &mut W,
78 journal_id: u64,
79 next_usn: i64,
80 ) -> std::io::Result<()> {
81 let mut h = xxhash_rust::xxh64::Xxh64::new(0);
82 let mut head = Vec::with_capacity(32);
83 head.extend_from_slice(SNAPSHOT_MAGIC);
84 head.extend_from_slice(&journal_id.to_le_bytes());
85 head.extend_from_slice(&next_usn.to_le_bytes());
86 head.extend_from_slice(&(self.len() as u64).to_le_bytes());
87 h.update(&head);
88 w.write_all(&head)?;
89
90 write_vec(w, &mut h, &self.lower_pool)?;
91 write_vec(w, &mut h, &self.orig_pool)?;
92 write_vec(w, &mut h, &self.orig_off)?;
93 write_vec(w, &mut h, &self.name_off)?;
94 write_vec(w, &mut h, &self.name_len)?;
95 write_vec(w, &mut h, &self.parent)?;
96 write_vec(w, &mut h, &self.size_lo)?;
97 let mut ovf: Vec<(EntryId, u64)> = self.size_ovf.iter().map(|(&k, &v)| (k, v)).collect();
100 ovf.sort_unstable();
101 let ovf_ids: Vec<u32> = ovf.iter().map(|p| p.0).collect();
102 let ovf_sizes: Vec<u64> = ovf.iter().map(|p| p.1).collect();
103 write_vec(w, &mut h, &ovf_ids)?;
104 write_vec(w, &mut h, &ovf_sizes)?;
105 write_vec(w, &mut h, &self.mtime)?;
106 write_vec(w, &mut h, &self.frn)?;
107 write_vec(w, &mut h, &self.flag)?;
108 write_vec(w, &mut h, &self.perm_name)?;
109 w.write_all(&h.digest().to_le_bytes())
110 }
111
112 pub fn read_snapshot<R: std::io::Read>(r: &mut R) -> std::io::Result<(Self, u64, i64)> {
128 use std::io::{Error, ErrorKind};
129 let bad = |m: &str| Error::new(ErrorKind::InvalidData, m.to_string());
130
131 let mut h = xxhash_rust::xxh64::Xxh64::new(0);
132 let mut head = [0u8; 32];
133 r.read_exact(&mut head)?;
134 if &head[..8] != SNAPSHOT_MAGIC {
135 return Err(bad("bad magic"));
136 }
137 h.update(&head);
138 let journal_id = u64::from_le_bytes(head[8..16].try_into().expect("32-byte header"));
139 let next_usn = i64::from_le_bytes(head[16..24].try_into().expect("32-byte header"));
140 let count = u64::from_le_bytes(head[24..32].try_into().expect("32-byte header")) as usize;
141
142 let lower_pool: Vec<u8> = read_vec(r, &mut h)?;
143 let orig_pool: Vec<u8> = read_vec(r, &mut h)?;
144 let orig_off: Vec<u32> = read_vec(r, &mut h)?;
145 let name_off: Vec<u32> = read_vec(r, &mut h)?;
146 let name_len: Vec<u16> = read_vec(r, &mut h)?;
147 let parent: Vec<u32> = read_vec(r, &mut h)?;
148 let size_lo: Vec<u32> = read_vec(r, &mut h)?;
149 let ovf_ids: Vec<u32> = read_vec(r, &mut h)?;
150 let ovf_sizes: Vec<u64> = read_vec(r, &mut h)?;
151 let mtime: Vec<i64> = read_vec(r, &mut h)?;
152 let frn: Vec<u64> = read_vec(r, &mut h)?;
153 let flag: Vec<u8> = read_vec(r, &mut h)?;
154 let perm_name: Vec<u32> = read_vec(r, &mut h)?;
155
156 let mut digest = [0u8; 8];
157 r.read_exact(&mut digest)?;
158 if u64::from_le_bytes(digest) != h.digest() {
159 return Err(bad("checksum mismatch"));
160 }
161 let columns_ok = [
162 orig_off.len(),
163 name_off.len(),
164 name_len.len(),
165 parent.len(),
166 size_lo.len(),
167 mtime.len(),
168 frn.len(),
169 flag.len(),
170 perm_name.len(),
171 ]
172 .iter()
173 .all(|&l| l == count);
174 if !columns_ok {
175 return Err(bad("column length mismatch"));
176 }
177 for i in 0..count {
181 let len = name_len[i] as usize;
182 let lower_ok = (name_off[i] as usize)
183 .checked_add(len)
184 .is_some_and(|end| end <= lower_pool.len());
185 let orig_ok = match orig_off[i] {
186 u32::MAX => true,
187 off => (off as usize)
188 .checked_add(len)
189 .is_some_and(|end| end <= orig_pool.len()),
190 };
191 if !lower_ok || !orig_ok {
192 return Err(bad("name slice out of pool bounds"));
193 }
194 }
195 if ovf_ids.len() != ovf_sizes.len() {
200 return Err(bad("size overflow length mismatch"));
201 }
202 let sentinels = size_lo.iter().filter(|&&v| v == u32::MAX).count();
203 if sentinels != ovf_ids.len() {
204 return Err(bad("size overflow sentinel mismatch"));
205 }
206 for (i, (&id, &sz)) in ovf_ids.iter().zip(&ovf_sizes).enumerate() {
207 let ascending = i == 0 || ovf_ids[i - 1] < id;
208 if !ascending
209 || (id as usize) >= count
210 || size_lo[id as usize] != u32::MAX
211 || sz < u32::MAX as u64
212 {
213 return Err(bad("size overflow pair invalid"));
214 }
215 }
216 let size_ovf: rustc_hash::FxHashMap<u32, u64> =
217 ovf_ids.into_iter().zip(ovf_sizes).collect();
218
219 let frn_index = super::frn::FrnIndex::build(&frn, &flag);
220 let mut tombstones = 0u32;
221 let mut dead_name_bytes = 0u64;
223 for (i, f) in flag.iter().enumerate() {
224 if f & flags::TOMBSTONE != 0 {
225 tombstones += 1;
226 let len = name_len[i] as u64;
227 dead_name_bytes += if orig_off[i] == u32::MAX {
228 len
229 } else {
230 len * 2
231 };
232 }
233 }
234
235 Ok((
236 Self {
237 lower_pool,
238 orig_pool,
239 orig_off,
240 name_off,
241 name_len,
242 parent,
243 size_lo,
244 size_ovf,
245 mtime,
246 frn,
247 flag,
248 frn_index,
249 perm_name,
250 content_generation: 0,
251 structural_generation: 0,
252 dir_topology_generation: 0,
253 tombstones,
254 dead_name_bytes,
255 derived_cache: Mutex::new(None),
256 },
257 journal_id,
258 next_usn,
259 ))
260 }
261
262 pub fn save_to(
269 &self,
270 path: &std::path::Path,
271 journal_id: u64,
272 next_usn: i64,
273 ) -> std::io::Result<()> {
274 use std::io::Write;
275
276 let tmp = path.with_extension("fmfidx.tmp");
277 if let Some(dir) = path.parent() {
278 std::fs::create_dir_all(dir)?;
279 }
280 {
281 let mut w = std::io::BufWriter::new(std::fs::File::create(&tmp)?);
282 self.write_snapshot(&mut w, journal_id, next_usn)?;
283 w.flush()?;
284 }
285 std::fs::rename(&tmp, path)
286 }
287
288 pub fn load_from(path: &std::path::Path) -> std::io::Result<(Self, u64, i64)> {
295 let mut r = std::io::BufReader::new(std::fs::File::open(path)?);
296 Self::read_snapshot(&mut r)
297 }
298}
299
300#[cfg(test)]
301mod tests {
302 use super::*;
303
304 use crate::index::testutil::{TestDir, build_sample};
305
306 #[test]
307 fn snapshot_roundtrip_preserves_everything() {
308 let mut idx = build_sample();
309 idx.delete(60); let first_new = idx.len() as u32;
312 let huge = crate::index::testutil::u16s("huge.iso");
313 let huge_id = idx.upsert(&crate::index::testutil::raw(
314 777,
315 50,
316 &huge,
317 false,
318 (5u64 << 30) + 123,
319 1,
320 ));
321 idx.merge_new_into_permutations(first_new);
322
323 let mut buf = Vec::new();
324 idx.write_snapshot(&mut buf, 0xDEAD_BEEF_u64, 12345)
325 .unwrap();
326 let (loaded, journal_id, next_usn) =
327 VolumeIndex::read_snapshot(&mut buf.as_slice()).unwrap();
328
329 assert_eq!(journal_id, 0xDEAD_BEEF_u64);
330 assert_eq!(next_usn, 12345);
331 assert_eq!(loaded.len(), idx.len());
332 assert_eq!(loaded.live_len(), idx.live_len());
333 assert_eq!(loaded.entry_by_record(60), None);
335 let note = loaded.entry_by_record(100).unwrap();
336 let mut p = Vec::new();
337 loaded.append_path(note, &mut p);
338 assert_eq!(p, b"C:\\docs\\Note.TXT");
339 assert_eq!(loaded.name_permutation(), idx.name_permutation());
340 assert_eq!(loaded.size(huge_id), (5u64 << 30) + 123);
341 }
342
343 #[test]
344 fn snapshot_size_overflow_inconsistencies_are_rejected() {
345 let mut sections = valid_sections();
347 sections[7] = vec![0u8; 4]; sections[8] = 0x00FF_FFFF_FFFF_u64.to_le_bytes().to_vec();
349 let err = read_crafted(sections);
350 assert!(err.to_string().contains("sentinel mismatch"), "{err}");
351
352 let mut sections = valid_sections();
354 sections[6] = u32::MAX.to_le_bytes().to_vec();
355 let err = read_crafted(sections);
356 assert!(err.to_string().contains("sentinel mismatch"), "{err}");
357
358 let mut sections = valid_sections();
360 sections[6] = u32::MAX.to_le_bytes().to_vec();
361 sections[7] = vec![0u8; 4];
362 sections[8] = 42u64.to_le_bytes().to_vec();
363 let err = read_crafted(sections);
364 assert!(err.to_string().contains("pair invalid"), "{err}");
365
366 let mut sections = valid_sections();
368 sections[6] = u32::MAX.to_le_bytes().to_vec();
369 sections[7] = vec![0u8; 4];
370 let err = read_crafted(sections);
371 assert!(err.to_string().contains("length mismatch"), "{err}");
372 }
373
374 #[test]
375 fn snapshot_corruption_is_detected() {
376 let idx = build_sample();
377 let mut buf = Vec::new();
378 idx.write_snapshot(&mut buf, 1, 1).unwrap();
379 let mid = buf.len() / 2;
380 buf[mid] ^= 0xFF;
381 assert!(VolumeIndex::read_snapshot(&mut buf.as_slice()).is_err());
382
383 let mut truncated = Vec::new();
384 idx.write_snapshot(&mut truncated, 1, 1).unwrap();
385 truncated.truncate(truncated.len() - 3);
386 assert!(VolumeIndex::read_snapshot(&mut truncated.as_slice()).is_err());
387 }
388
389 fn craft_stream(count: u64, sections: &[&[u8]]) -> Vec<u8> {
392 let mut h = xxhash_rust::xxh64::Xxh64::new(0);
393 let mut buf = Vec::new();
394 buf.extend_from_slice(SNAPSHOT_MAGIC);
395 buf.extend_from_slice(&1u64.to_le_bytes());
396 buf.extend_from_slice(&2i64.to_le_bytes());
397 buf.extend_from_slice(&count.to_le_bytes());
398 h.update(&buf);
399 for s in sections {
400 write_vec(&mut buf, &mut h, s).unwrap();
401 }
402 let digest = h.digest().to_le_bytes();
403 buf.extend_from_slice(&digest);
404 buf
405 }
406
407 fn valid_sections() -> Vec<Vec<u8>> {
412 vec![
413 b"a".to_vec(), Vec::new(), u32::MAX.to_le_bytes().to_vec(), vec![0u8; 4], 1u16.to_le_bytes().to_vec(), vec![0u8; 4], vec![0u8; 4], Vec::new(), Vec::new(), vec![0u8; 8], vec![0u8; 8], vec![0u8; 1], vec![0u8; 4], ]
427 }
428
429 fn expect_load_err(buf: &[u8]) -> std::io::Error {
431 match VolumeIndex::read_snapshot(&mut &*buf) {
432 Ok(_) => panic!("corrupted snapshot must not load"),
433 Err(e) => e,
434 }
435 }
436
437 fn read_crafted(sections: Vec<Vec<u8>>) -> std::io::Error {
438 let refs: Vec<&[u8]> = sections.iter().map(Vec::as_slice).collect();
439 expect_load_err(&craft_stream(1, &refs))
440 }
441
442 #[test]
443 fn snapshot_wrong_magic_is_rejected() {
444 let idx = build_sample();
445 let mut buf = Vec::new();
446 idx.write_snapshot(&mut buf, 1, 1).unwrap();
447 buf[..8].copy_from_slice(b"FMFIDX01"); let err = expect_load_err(&buf);
449 assert_eq!(err.kind(), std::io::ErrorKind::InvalidData);
450 assert!(err.to_string().contains("magic"), "{err}");
451 }
452
453 #[test]
454 fn snapshot_crafted_stream_sanity_loads() {
455 let refs: Vec<Vec<u8>> = valid_sections();
458 let refs: Vec<&[u8]> = refs.iter().map(Vec::as_slice).collect();
459 let buf = craft_stream(1, &refs);
460 let (idx, journal_id, next_usn) = VolumeIndex::read_snapshot(&mut buf.as_slice()).unwrap();
461 assert_eq!((journal_id, next_usn), (1, 2));
462 assert_eq!(idx.len(), 1);
463 }
464
465 #[test]
466 fn snapshot_column_count_mismatch_is_rejected_not_panic() {
467 let mut sections = valid_sections();
469 sections[3] = vec![0u8; 8];
470 let err = read_crafted(sections);
471 assert_eq!(err.kind(), std::io::ErrorKind::InvalidData);
472 assert!(err.to_string().contains("column length"), "{err}");
473 }
474
475 #[test]
476 fn snapshot_misaligned_section_is_rejected_not_panic() {
477 let mut sections = valid_sections();
479 sections[3] = vec![0u8; 7];
480 let err = read_crafted(sections);
481 assert_eq!(err.kind(), std::io::ErrorKind::InvalidData);
482 assert!(err.to_string().contains("section size"), "{err}");
483 }
484
485 #[test]
486 fn snapshot_out_of_bounds_name_slices_are_rejected() {
487 let mut sections = valid_sections();
489 sections[4] = 2u16.to_le_bytes().to_vec();
490 let err = read_crafted(sections);
491 assert_eq!(err.kind(), std::io::ErrorKind::InvalidData);
492 assert!(err.to_string().contains("out of pool bounds"), "{err}");
493
494 let mut sections = valid_sections();
496 sections[2] = 0u32.to_le_bytes().to_vec();
497 let err = read_crafted(sections);
498 assert!(err.to_string().contains("out of pool bounds"), "{err}");
499
500 let mut sections = valid_sections();
502 sections[1] = b"A".to_vec();
503 sections[2] = 0u32.to_le_bytes().to_vec();
504 let refs: Vec<&[u8]> = sections.iter().map(Vec::as_slice).collect();
505 let buf = craft_stream(1, &refs);
506 let (idx, _, _) = VolumeIndex::read_snapshot(&mut buf.as_slice()).unwrap();
507 assert_eq!(idx.name(0), b"A");
508 assert_eq!(idx.lower_name(0), b"a");
509 }
510
511 #[test]
512 fn snapshot_lying_length_prefix_errors_without_huge_allocation() {
513 let idx = build_sample();
516 let mut buf = Vec::new();
517 idx.write_snapshot(&mut buf, 1, 1).unwrap();
518 buf[32..40].copy_from_slice(&(1u64 << 60).to_le_bytes());
519 assert!(VolumeIndex::read_snapshot(&mut buf.as_slice()).is_err());
520 }
521
522 #[test]
523 fn truncated_snapshot_errors_at_every_cut_point() {
524 let mut idx = build_sample();
525 idx.delete(60); let mut buf = Vec::new();
527 idx.write_snapshot(&mut buf, 1, 1).unwrap();
528 assert!(VolumeIndex::read_snapshot(&mut buf.as_slice()).is_ok());
529 for cut in 0..buf.len() {
530 assert!(
531 VolumeIndex::read_snapshot(&mut &buf[..cut]).is_err(),
532 "cut at {cut} must error, not panic or succeed"
533 );
534 }
535 }
536
537 #[test]
538 fn save_to_leaves_no_tmp_file_and_overwrites() {
539 let dir = TestDir::new();
540 let target = dir.join("vol_c.fmfidx");
541 let idx = build_sample();
542 idx.save_to(&target, 7, 8).unwrap();
543 idx.save_to(&target, 9, 10).unwrap(); let names: Vec<String> = std::fs::read_dir(dir.path())
546 .unwrap()
547 .map(|e| e.unwrap().file_name().to_string_lossy().into_owned())
548 .collect();
549 assert_eq!(names, vec!["vol_c.fmfidx"], "no .tmp or stray files");
550
551 let (loaded, journal_id, next_usn) = VolumeIndex::load_from(&target).unwrap();
552 assert_eq!((journal_id, next_usn), (9, 10));
553 assert_eq!(loaded.len(), idx.len());
554 }
555}