Skip to main content

aozora_syntax/borrowed/
arena.rs

1//! Per-document bump arena.
2//!
3//! Wraps [`bumpalo::Bump`] with the AST-friendly subset of allocation
4//! primitives the lex / render / parallel layers need. The arena is
5//! consumed by [`super::Arena::alloc`] and friends; references it
6//! returns are valid for the borrow of `&self`, which downstream
7//! consumers re-export as the AST's `'src` lifetime.
8//!
9//! ## Why bumpalo
10//!
11//! - Allocate-only: parses produce trees, never mutate them in place.
12//!   Bump's drop-everything-at-once model matches that exactly.
13//! - Single-threaded by default; parallel parse paths use one arena
14//!   per worker, then merge.
15
16use bumpalo::Bump;
17
18/// Bump-allocator arena owning all AST node storage for a single
19/// parse.
20///
21/// Methods that allocate return references whose lifetime is tied to
22/// `&self`; consumers commonly re-bind that lifetime as the parsed
23/// tree's `'src` parameter.
24#[derive(Debug, Default)]
25pub struct Arena {
26    bump: Bump,
27}
28
29impl Arena {
30    /// Empty arena.
31    #[must_use]
32    pub fn new() -> Self {
33        Self { bump: Bump::new() }
34    }
35
36    /// Empty arena with at least `capacity` bytes pre-reserved. Use
37    /// when the source size is known to be large — e.g., the lex
38    /// driver might call `Arena::with_capacity(source.len() / 4)` to
39    /// avoid early growth allocations on a multi-MB document.
40    #[must_use]
41    pub fn with_capacity(capacity: usize) -> Self {
42        Self {
43            bump: Bump::with_capacity(capacity),
44        }
45    }
46
47    /// Allocate `value` in the arena, returning a borrowed reference
48    /// that is valid for `&self`'s lifetime.
49    pub fn alloc<T>(&self, value: T) -> &T {
50        self.bump.alloc(value)
51    }
52
53    /// Allocate a copy of `s` in the arena. Used when the lex layer
54    /// produces a new (synthesised or rewritten) string that does not
55    /// directly point into the source buffer.
56    pub fn alloc_str(&self, s: &str) -> &str {
57        self.bump.alloc_str(s)
58    }
59
60    /// Allocate a slice copy of `slice` in the arena. Restricted to
61    /// `Copy` types because the borrowed AST contains only `Copy`
62    /// data (refs, primitives, `Copy` enums) — see the `borrowed`
63    /// module docs.
64    pub fn alloc_slice_copy<T: Copy>(&self, slice: &[T]) -> &[T] {
65        self.bump.alloc_slice_copy(slice)
66    }
67
68    /// Allocate a slice from an iterator. Useful for assembling a
69    /// `Content::Segments` payload from a builder loop.
70    pub fn alloc_slice_fill_iter<T, I>(&self, iter: I) -> &[T]
71    where
72        I: IntoIterator<Item = T>,
73        I::IntoIter: ExactSizeIterator,
74    {
75        self.bump.alloc_slice_fill_iter(iter)
76    }
77
78    /// Bytes currently allocated to chunks (committed memory). For
79    /// diagnostic / benchmarking use only.
80    #[must_use]
81    pub fn allocated_bytes(&self) -> usize {
82        self.bump.allocated_bytes()
83    }
84
85    /// Borrow the inner [`Bump`] allocator. Used by structures that
86    /// need to hold their own arena-backed storage (e.g. the
87    /// [`super::Interner`]'s probe table, which is itself a
88    /// `BumpVec` allocated inside the arena).
89    #[must_use]
90    pub fn bump(&self) -> &Bump {
91        &self.bump
92    }
93
94    /// Drop every allocation without releasing the underlying chunks.
95    /// The next `alloc*` call reuses the same memory pages — saving
96    /// the `mmap` syscall a fresh [`Arena::new`] would pay.
97    ///
98    /// `&mut self` enforces at compile time that no live borrow into
99    /// the arena exists at reset time: every `alloc`-returned `&T`
100    /// borrows from `&self`, so a caller holding such a reference
101    /// can never simultaneously call `&mut self`. Trying to do so is
102    /// a borrow-checker error, not a runtime UAF.
103    ///
104    /// Used by long-running workers (rayon parallel corpus sweep, the
105    /// LSP daemon, etc.) that parse many documents in succession and
106    /// would otherwise pay one `mmap` per parse.
107    pub fn reset(&mut self) {
108        self.bump.reset();
109    }
110
111    /// Reset and pre-size: drop every allocation, then ensure the
112    /// retained chunk capacity is at least `target_capacity` bytes
113    /// before returning.
114    ///
115    /// Behaviour:
116    /// - When the arena's existing chunk capacity already meets the
117    ///   target, this is identical to [`Arena::reset`] — no syscall,
118    ///   no fresh allocation.
119    /// - When the target exceeds current capacity, the underlying
120    ///   bump is replaced with a freshly-allocated one of at least
121    ///   `target_capacity` bytes. The previous chunks are released
122    ///   to the system allocator; the new bump mmaps one chunk at
123    ///   the requested size.
124    ///
125    /// The replace path costs one `mmap` per *growth event*, not per
126    /// parse. Steady-state workloads (corpus sweep on similar-sized
127    /// docs) hit the no-op fast path after the first parse on each
128    /// worker thread; only docs whose AST exceeds the high-water mark
129    /// pay the syscall. Compared to plain [`Arena::reset`] +
130    /// chunk-grow-on-demand, the cost is identical (same number of
131    /// mmaps) but moved out of the parse hot path: the syscall fires
132    /// before `lex_into_arena` rather than inside it, removing one
133    /// source of intra-parse latency variance.
134    ///
135    /// Used by long-running workers (rayon corpus sweep, LSP daemon)
136    /// that have a per-source size hint available — typically
137    /// `source.len() * 4` for the borrowed AST shape.
138    pub fn reset_with_hint(&mut self, target_capacity: usize) {
139        if self.bump.allocated_bytes() >= target_capacity {
140            self.bump.reset();
141        } else {
142            self.bump = Bump::with_capacity(target_capacity);
143        }
144    }
145}
146
147#[cfg(test)]
148mod tests {
149    use super::*;
150
151    #[test]
152    fn new_arena_is_empty() {
153        let a = Arena::new();
154        // Bumpalo's accounting is internal; we just ensure no panic
155        // and the API call returns a reasonable value.
156        let bytes = a.allocated_bytes();
157        // Every Bump pre-allocates at least a small chunk, so >=0
158        // is the only thing we can pin without coupling to bumpalo's
159        // internal sizing constants.
160        let _ = bytes; // intentional: only checking method works
161    }
162
163    #[test]
164    fn alloc_returns_reference_with_arena_lifetime() {
165        let a = Arena::new();
166        let n: &u32 = a.alloc(42u32);
167        assert_eq!(*n, 42);
168    }
169
170    #[test]
171    fn alloc_str_copies_into_arena() {
172        let a = Arena::new();
173        let s = a.alloc_str("hello");
174        assert_eq!(s, "hello");
175    }
176
177    #[test]
178    fn alloc_slice_copy_preserves_contents() {
179        let a = Arena::new();
180        let slice = a.alloc_slice_copy(&[1u32, 2, 3, 4, 5]);
181        assert_eq!(slice, &[1, 2, 3, 4, 5]);
182    }
183
184    #[test]
185    fn alloc_slice_fill_iter_handles_known_length() {
186        let a = Arena::new();
187        let slice = a.alloc_slice_fill_iter([10u32, 20, 30]);
188        assert_eq!(slice, &[10, 20, 30]);
189    }
190
191    #[test]
192    fn with_capacity_preallocates_some_chunk() {
193        let a = Arena::with_capacity(4096);
194        // The exact bytes_allocated value is a bumpalo internal, so we
195        // only verify the call doesn't panic and is non-zero.
196        assert!(a.allocated_bytes() > 0);
197    }
198
199    #[test]
200    fn many_small_allocations_share_arena() {
201        let a = Arena::new();
202        // Allocate 1k tiny values. They must all coexist (no aliasing,
203        // no drop). Pin the contents so the borrow checker is happy.
204        let pointers: Vec<&u32> = (0..1000u32).map(|i| a.alloc(i)).collect();
205        for (i, p) in pointers.iter().enumerate() {
206            let expected = u32::try_from(i).expect("loop bound fits in u32");
207            assert_eq!(**p, expected);
208        }
209    }
210
211    #[test]
212    fn reset_with_hint_grows_when_target_exceeds_current_capacity() {
213        let mut a = Arena::with_capacity(4096);
214        let before = a.allocated_bytes();
215        // Request 10× the current capacity. The bump must be replaced
216        // with a freshly-allocated one large enough to hold the hint.
217        let target = before.saturating_mul(10).max(64 * 1024);
218        a.reset_with_hint(target);
219        let after = a.allocated_bytes();
220        assert!(
221            after >= target,
222            "capacity must grow to at least target (target={target}, after={after})"
223        );
224
225        // Arena is reusable after the grow.
226        let v: &u32 = a.alloc(7u32);
227        assert_eq!(*v, 7);
228    }
229
230    #[test]
231    fn reset_with_hint_is_a_plain_reset_when_target_already_met() {
232        // Pre-size large; ask for a smaller hint. The arena must not
233        // shrink: bumpalo's reset retains chunks, and reset_with_hint's
234        // fast path takes the same plain-reset branch when the hint
235        // is below current capacity.
236        let mut a = Arena::with_capacity(64 * 1024);
237        for i in 0..256u32 {
238            let _ = a.alloc(i);
239        }
240        let before = a.allocated_bytes();
241        a.reset_with_hint(1024);
242        let after = a.allocated_bytes();
243        assert_eq!(after, before, "small-target hint must not shrink the arena");
244    }
245
246    #[test]
247    fn reset_drops_allocations_but_keeps_capacity() {
248        let mut a = Arena::with_capacity(4096);
249        // Fill enough bytes that bumpalo definitely opens its first
250        // chunk. We don't need to keep the references — `&mut self`
251        // on `reset` enforces that they're dropped before reset is
252        // called.
253        for i in 0..256u32 {
254            let _ = a.alloc(i);
255            let _ = a.alloc_str("filler");
256        }
257        let before = a.allocated_bytes();
258        assert!(before > 0, "fill loop must have allocated something");
259
260        a.reset();
261
262        // bumpalo retains the previously-allocated chunks after reset
263        // so subsequent allocations don't pay another mmap. The
264        // accounting therefore stays at or above the pre-reset
265        // value — what we verify is "no shrink", not an exact size
266        // (bumpalo internals can shift the high-water mark on reset).
267        let after = a.allocated_bytes();
268        assert!(
269            after >= before / 2,
270            "reset should retain at least half the previous capacity (before={before}, after={after})"
271        );
272
273        // Arena is reusable: a fresh allocation works and returns a
274        // valid reference.
275        let v: &u32 = a.alloc(99u32);
276        assert_eq!(*v, 99);
277    }
278}