aozora_syntax/borrowed/arena.rs
1//! Per-document bump arena.
2//!
3//! Wraps [`bumpalo::Bump`] with the AST-friendly subset of allocation
4//! primitives the lex / render / parallel layers need. The arena is
5//! consumed by [`super::Arena::alloc`] and friends; references it
6//! returns are valid for the borrow of `&self`, which downstream
7//! consumers re-export as the AST's `'src` lifetime.
8//!
9//! ## Why bumpalo
10//!
11//! - Allocate-only: parses produce trees, never mutate them in place.
12//! Bump's drop-everything-at-once model matches that exactly.
13//! - Single-threaded by default; parallel parse paths use one arena
14//! per worker, then merge.
15
16use bumpalo::Bump;
17
18/// Bump-allocator arena owning all AST node storage for a single
19/// parse.
20///
21/// Methods that allocate return references whose lifetime is tied to
22/// `&self`; consumers commonly re-bind that lifetime as the parsed
23/// tree's `'src` parameter.
24#[derive(Debug, Default)]
25pub struct Arena {
26 bump: Bump,
27}
28
29impl Arena {
30 /// Empty arena.
31 #[must_use]
32 pub fn new() -> Self {
33 Self { bump: Bump::new() }
34 }
35
36 /// Empty arena with at least `capacity` bytes pre-reserved. Use
37 /// when the source size is known to be large — e.g., the lex
38 /// driver might call `Arena::with_capacity(source.len() / 4)` to
39 /// avoid early growth allocations on a multi-MB document.
40 #[must_use]
41 pub fn with_capacity(capacity: usize) -> Self {
42 Self {
43 bump: Bump::with_capacity(capacity),
44 }
45 }
46
47 /// Allocate `value` in the arena, returning a borrowed reference
48 /// that is valid for `&self`'s lifetime.
49 pub fn alloc<T>(&self, value: T) -> &T {
50 self.bump.alloc(value)
51 }
52
53 /// Allocate a copy of `s` in the arena. Used when the lex layer
54 /// produces a new (synthesised or rewritten) string that does not
55 /// directly point into the source buffer.
56 pub fn alloc_str(&self, s: &str) -> &str {
57 self.bump.alloc_str(s)
58 }
59
60 /// Allocate a slice copy of `slice` in the arena. Restricted to
61 /// `Copy` types because the borrowed AST contains only `Copy`
62 /// data (refs, primitives, `Copy` enums) — see the `borrowed`
63 /// module docs.
64 pub fn alloc_slice_copy<T: Copy>(&self, slice: &[T]) -> &[T] {
65 self.bump.alloc_slice_copy(slice)
66 }
67
68 /// Allocate a slice from an iterator. Useful for assembling a
69 /// `Content::Segments` payload from a builder loop.
70 pub fn alloc_slice_fill_iter<T, I>(&self, iter: I) -> &[T]
71 where
72 I: IntoIterator<Item = T>,
73 I::IntoIter: ExactSizeIterator,
74 {
75 self.bump.alloc_slice_fill_iter(iter)
76 }
77
78 /// Bytes currently allocated to chunks (committed memory). For
79 /// diagnostic / benchmarking use only.
80 #[must_use]
81 pub fn allocated_bytes(&self) -> usize {
82 self.bump.allocated_bytes()
83 }
84
85 /// Borrow the inner [`Bump`] allocator. Used by structures that
86 /// need to hold their own arena-backed storage (e.g. the
87 /// [`super::Interner`]'s probe table, which is itself a
88 /// `BumpVec` allocated inside the arena).
89 #[must_use]
90 pub fn bump(&self) -> &Bump {
91 &self.bump
92 }
93
94 /// Drop every allocation without releasing the underlying chunks.
95 /// The next `alloc*` call reuses the same memory pages — saving
96 /// the `mmap` syscall a fresh [`Arena::new`] would pay.
97 ///
98 /// `&mut self` enforces at compile time that no live borrow into
99 /// the arena exists at reset time: every `alloc`-returned `&T`
100 /// borrows from `&self`, so a caller holding such a reference
101 /// can never simultaneously call `&mut self`. Trying to do so is
102 /// a borrow-checker error, not a runtime UAF.
103 ///
104 /// Used by long-running workers (rayon parallel corpus sweep, the
105 /// LSP daemon, etc.) that parse many documents in succession and
106 /// would otherwise pay one `mmap` per parse.
107 pub fn reset(&mut self) {
108 self.bump.reset();
109 }
110
111 /// Reset and pre-size: drop every allocation, then ensure the
112 /// retained chunk capacity is at least `target_capacity` bytes
113 /// before returning.
114 ///
115 /// Behaviour:
116 /// - When the arena's existing chunk capacity already meets the
117 /// target, this is identical to [`Arena::reset`] — no syscall,
118 /// no fresh allocation.
119 /// - When the target exceeds current capacity, the underlying
120 /// bump is replaced with a freshly-allocated one of at least
121 /// `target_capacity` bytes. The previous chunks are released
122 /// to the system allocator; the new bump mmaps one chunk at
123 /// the requested size.
124 ///
125 /// The replace path costs one `mmap` per *growth event*, not per
126 /// parse. Steady-state workloads (corpus sweep on similar-sized
127 /// docs) hit the no-op fast path after the first parse on each
128 /// worker thread; only docs whose AST exceeds the high-water mark
129 /// pay the syscall. Compared to plain [`Arena::reset`] +
130 /// chunk-grow-on-demand, the cost is identical (same number of
131 /// mmaps) but moved out of the parse hot path: the syscall fires
132 /// before `lex_into_arena` rather than inside it, removing one
133 /// source of intra-parse latency variance.
134 ///
135 /// Used by long-running workers (rayon corpus sweep, LSP daemon)
136 /// that have a per-source size hint available — typically
137 /// `source.len() * 4` for the borrowed AST shape.
138 pub fn reset_with_hint(&mut self, target_capacity: usize) {
139 if self.bump.allocated_bytes() >= target_capacity {
140 self.bump.reset();
141 } else {
142 self.bump = Bump::with_capacity(target_capacity);
143 }
144 }
145}
146
147#[cfg(test)]
148mod tests {
149 use super::*;
150
151 #[test]
152 fn new_arena_is_empty() {
153 let a = Arena::new();
154 // Bumpalo's accounting is internal; we just ensure no panic
155 // and the API call returns a reasonable value.
156 let bytes = a.allocated_bytes();
157 // Every Bump pre-allocates at least a small chunk, so >=0
158 // is the only thing we can pin without coupling to bumpalo's
159 // internal sizing constants.
160 let _ = bytes; // intentional: only checking method works
161 }
162
163 #[test]
164 fn alloc_returns_reference_with_arena_lifetime() {
165 let a = Arena::new();
166 let n: &u32 = a.alloc(42u32);
167 assert_eq!(*n, 42);
168 }
169
170 #[test]
171 fn alloc_str_copies_into_arena() {
172 let a = Arena::new();
173 let s = a.alloc_str("hello");
174 assert_eq!(s, "hello");
175 }
176
177 #[test]
178 fn alloc_slice_copy_preserves_contents() {
179 let a = Arena::new();
180 let slice = a.alloc_slice_copy(&[1u32, 2, 3, 4, 5]);
181 assert_eq!(slice, &[1, 2, 3, 4, 5]);
182 }
183
184 #[test]
185 fn alloc_slice_fill_iter_handles_known_length() {
186 let a = Arena::new();
187 let slice = a.alloc_slice_fill_iter([10u32, 20, 30]);
188 assert_eq!(slice, &[10, 20, 30]);
189 }
190
191 #[test]
192 fn with_capacity_preallocates_some_chunk() {
193 let a = Arena::with_capacity(4096);
194 // The exact bytes_allocated value is a bumpalo internal, so we
195 // only verify the call doesn't panic and is non-zero.
196 assert!(a.allocated_bytes() > 0);
197 }
198
199 #[test]
200 fn many_small_allocations_share_arena() {
201 let a = Arena::new();
202 // Allocate 1k tiny values. They must all coexist (no aliasing,
203 // no drop). Pin the contents so the borrow checker is happy.
204 let pointers: Vec<&u32> = (0..1000u32).map(|i| a.alloc(i)).collect();
205 for (i, p) in pointers.iter().enumerate() {
206 let expected = u32::try_from(i).expect("loop bound fits in u32");
207 assert_eq!(**p, expected);
208 }
209 }
210
211 #[test]
212 fn reset_with_hint_grows_when_target_exceeds_current_capacity() {
213 let mut a = Arena::with_capacity(4096);
214 let before = a.allocated_bytes();
215 // Request 10× the current capacity. The bump must be replaced
216 // with a freshly-allocated one large enough to hold the hint.
217 let target = before.saturating_mul(10).max(64 * 1024);
218 a.reset_with_hint(target);
219 let after = a.allocated_bytes();
220 assert!(
221 after >= target,
222 "capacity must grow to at least target (target={target}, after={after})"
223 );
224
225 // Arena is reusable after the grow.
226 let v: &u32 = a.alloc(7u32);
227 assert_eq!(*v, 7);
228 }
229
230 #[test]
231 fn reset_with_hint_is_a_plain_reset_when_target_already_met() {
232 // Pre-size large; ask for a smaller hint. The arena must not
233 // shrink: bumpalo's reset retains chunks, and reset_with_hint's
234 // fast path takes the same plain-reset branch when the hint
235 // is below current capacity.
236 let mut a = Arena::with_capacity(64 * 1024);
237 for i in 0..256u32 {
238 let _ = a.alloc(i);
239 }
240 let before = a.allocated_bytes();
241 a.reset_with_hint(1024);
242 let after = a.allocated_bytes();
243 assert_eq!(after, before, "small-target hint must not shrink the arena");
244 }
245
246 #[test]
247 fn reset_drops_allocations_but_keeps_capacity() {
248 let mut a = Arena::with_capacity(4096);
249 // Fill enough bytes that bumpalo definitely opens its first
250 // chunk. We don't need to keep the references — `&mut self`
251 // on `reset` enforces that they're dropped before reset is
252 // called.
253 for i in 0..256u32 {
254 let _ = a.alloc(i);
255 let _ = a.alloc_str("filler");
256 }
257 let before = a.allocated_bytes();
258 assert!(before > 0, "fill loop must have allocated something");
259
260 a.reset();
261
262 // bumpalo retains the previously-allocated chunks after reset
263 // so subsequent allocations don't pay another mmap. The
264 // accounting therefore stays at or above the pre-reset
265 // value — what we verify is "no shrink", not an exact size
266 // (bumpalo internals can shift the high-water mark on reset).
267 let after = a.allocated_bytes();
268 assert!(
269 after >= before / 2,
270 "reset should retain at least half the previous capacity (before={before}, after={after})"
271 );
272
273 // Arena is reusable: a fresh allocation works and returns a
274 // valid reference.
275 let v: &u32 = a.alloc(99u32);
276 assert_eq!(*v, 99);
277 }
278}