Skip to main content

aozora_syntax/
alloc.rs

1//! Arena-backed AST construction.
2//!
3//! [`BorrowedAllocator<'a>`] is the sole AST builder for the
4//! [`crate::borrowed`] AST. It owns an [`Interner`] so byte-equal
5//! strings (ruby readings, container labels, kaeriten marks) share a
6//! single arena allocation.
7//!
8//! ## Naming convention
9//!
10//! - `make_*` methods build *payload* references (`&'a Gaiji<'a>`,
11//!   `&'a Annotation<'a>`) without wrapping them in a node.
12//! - Variant-named methods (`ruby`, `bouten`, `gaiji`, …) build the
13//!   final [`borrowed::AozoraNode<'a>`]. The `gaiji` and `annotation`
14//!   node constructors take the payload reference (built via
15//!   `make_gaiji` / `make_annotation`) so a payload can be shared
16//!   between a `Segment` and a `Node` without recomputing the string
17//!   interns.
18//! - `seg_*` methods build segment elements for `content_segments`.
19//!
20//! ## Canonicalisation
21//!
22//! Both `content_plain("")` and `content_segments(&[])` return
23//! [`borrowed::Content::EMPTY`] (i.e. `Segments(&[])`). `content_segments`
24//! collapses an all-`Text` input into a single concatenated `Plain`
25//! (the concatenation is interned). The legacy owned `Content::from`
26//! / `Content::from_segments` helpers used the same canonicalisation;
27//! preserving it keeps the determinism + sentinel-alignment
28//! proptests in `aozora-lex/tests/property_borrowed_arena.rs` honest
29//! across edits.
30
31use aozora_encoding::gaiji::Resolved;
32
33use crate::borrowed::{self, Arena, Interner};
34use crate::{
35    AlignEnd, AnnotationKind, AozoraHeadingKind, BoutenKind, BoutenPosition, Container, Indent,
36    Keigakomi, SectionKind,
37};
38
39/// Arena-backed builder for [`borrowed::AozoraNode<'a>`] and its
40/// payload types.
41///
42/// Owns an [`Interner`] keyed off the supplied [`Arena`]; both string
43/// content and per-variant payloads land in the arena, so dropping the
44/// arena tears the entire AST down in one step (no per-node `Drop`
45/// runs, no `Box::drop` traffic).
46#[derive(Debug)]
47pub struct BorrowedAllocator<'a> {
48    arena: &'a Arena,
49    interner: Interner<'a>,
50}
51
52#[allow(
53    clippy::unused_self,
54    reason = "API consistency: every BorrowedAllocator builder method takes &mut self even when the variant is a pure wrapper, so call sites have a uniform shape (alloc.method(...) for every variant). Switching trivial wrappers to free fns would split the API in half."
55)]
56impl<'a> BorrowedAllocator<'a> {
57    /// New allocator with a fresh interner sized to `interner_capacity`.
58    /// Capacity is rounded up to the next power of two by the interner.
59    #[must_use]
60    pub fn with_capacity(arena: &'a Arena, interner_capacity: usize) -> Self {
61        Self {
62            arena,
63            interner: Interner::with_capacity_in(interner_capacity, arena),
64        }
65    }
66
67    /// Construct with the interner's default initial capacity (64 → 64
68    /// after power-of-two rounding).
69    #[must_use]
70    pub fn new(arena: &'a Arena) -> Self {
71        Self::with_capacity(arena, 64)
72    }
73
74    /// Borrow the underlying arena. Useful for callers that need to
75    /// emit an arena-allocated normalised text alongside the AST.
76    #[must_use]
77    pub fn arena(&self) -> &'a Arena {
78        self.arena
79    }
80
81    /// Finish allocation and return the interner so the caller can
82    /// inspect its dedup counters (cache hits, table hits, allocs,
83    /// average probe length). The interner's `&'a` arena reference
84    /// continues to keep the interned strings alive.
85    #[must_use]
86    pub fn into_interner(self) -> Interner<'a> {
87        self.interner
88    }
89
90    // ---------------------------------------------------------------------
91    // Content / segment builders
92    // ---------------------------------------------------------------------
93
94    /// Build a plain-text body content. Empty input canonicalises to
95    /// `Segments(&[])` (the legacy owned shape did the same).
96    pub fn content_plain(&mut self, s: &str) -> borrowed::Content<'a> {
97        if s.is_empty() {
98            borrowed::Content::EMPTY
99        } else {
100            borrowed::Content::Plain(self.interner.intern(s))
101        }
102    }
103
104    /// Build a body content from a sequence of segments. Empty input →
105    /// `Segments(&[])`; all-`Text` input collapses into a single
106    /// concatenated `Plain` (interned).
107    pub fn content_segments(&mut self, segs: &[borrowed::Segment<'a>]) -> borrowed::Content<'a> {
108        if segs.is_empty() {
109            return borrowed::Content::EMPTY;
110        }
111        if segs.iter().all(|s| matches!(s, borrowed::Segment::Text(_))) {
112            // Total length is known (sum of text lengths) so we can
113            // pre-size the buffer and avoid reallocation.
114            let total: usize = segs
115                .iter()
116                .map(|s| match s {
117                    borrowed::Segment::Text(t) => t.len(),
118                    _ => 0,
119                })
120                .sum();
121            let mut buf = String::with_capacity(total);
122            for s in segs {
123                if let borrowed::Segment::Text(t) = s {
124                    buf.push_str(t);
125                }
126            }
127            return borrowed::Content::Plain(self.interner.intern(&buf));
128        }
129        borrowed::Content::Segments(self.arena.alloc_slice_copy(segs))
130    }
131
132    /// `Segment::Text(s)` — interns the string.
133    pub fn seg_text(&mut self, s: &str) -> borrowed::Segment<'a> {
134        borrowed::Segment::Text(self.interner.intern(s))
135    }
136
137    /// `Segment::Gaiji(g)` — wraps a payload built via [`Self::make_gaiji`].
138    #[must_use]
139    pub fn seg_gaiji(&self, g: &'a borrowed::Gaiji<'a>) -> borrowed::Segment<'a> {
140        borrowed::Segment::Gaiji(g)
141    }
142
143    /// `Segment::Annotation(a)` — wraps a payload built via [`Self::make_annotation`].
144    #[must_use]
145    pub fn seg_annotation(&self, a: &'a borrowed::Annotation<'a>) -> borrowed::Segment<'a> {
146        borrowed::Segment::Annotation(a)
147    }
148
149    // ---------------------------------------------------------------------
150    // Payload builders (used by both Segment and Node constructors)
151    // ---------------------------------------------------------------------
152
153    /// Build a `Gaiji` payload. Use [`Self::seg_gaiji`] to wrap as a
154    /// segment, or [`Self::gaiji`] to wrap as a node.
155    pub fn make_gaiji(
156        &mut self,
157        description: &str,
158        ucs: Option<Resolved>,
159        mencode: Option<&str>,
160    ) -> &'a borrowed::Gaiji<'a> {
161        let g = borrowed::Gaiji {
162            description: self.interner.intern(description),
163            ucs,
164            mencode: mencode.map(|s| self.interner.intern(s)),
165        };
166        self.arena.alloc(g)
167    }
168
169    /// Build an `Annotation` payload. Use [`Self::seg_annotation`] to
170    /// wrap as a segment, or [`Self::annotation`] to wrap as a node.
171    ///
172    /// `raw` carries the [`borrowed::NonEmptyStr`] invariant.
173    ///
174    /// # Panics
175    ///
176    /// Panics if `raw` is empty. Phase 3 emits annotation only after
177    /// at least one byte landed in the bracket body.
178    pub fn make_annotation(
179        &mut self,
180        raw: &str,
181        kind: AnnotationKind,
182    ) -> &'a borrowed::Annotation<'a> {
183        let raw = borrowed::NonEmptyStr::new(self.interner.intern(raw))
184            .expect("Phase 3 must emit Annotation with non-empty raw bytes");
185        let a = borrowed::Annotation { raw, kind };
186        self.arena.alloc(a)
187    }
188
189    // ---------------------------------------------------------------------
190    // Node variant constructors (17 — matches the AozoraNode enum)
191    // ---------------------------------------------------------------------
192
193    /// `AozoraNode::Ruby(Ruby { base, reading, delim_explicit })`.
194    ///
195    /// `base` and `reading` carry the [`borrowed::NonEmpty`]
196    /// invariant. Phase 3 only emits Ruby once both are non-empty,
197    /// so this `expect` is a contract-check; an empty payload here
198    /// signals a classifier bug.
199    ///
200    /// # Panics
201    ///
202    /// Panics if `base` or `reading` is empty. Phase 3 emit-sites
203    /// classify only after the body is populated, so the panic
204    /// represents a pipeline-internal bug — the
205    /// [`borrowed::NonEmpty`] payload encodes this invariant at the
206    /// type level.
207    #[must_use]
208    pub fn ruby(
209        &self,
210        base: borrowed::Content<'a>,
211        reading: borrowed::Content<'a>,
212        delim_explicit: bool,
213    ) -> borrowed::AozoraNode<'a> {
214        let base =
215            borrowed::NonEmpty::new(base).expect("Phase 3 must emit Ruby with non-empty base");
216        let reading = borrowed::NonEmpty::new(reading)
217            .expect("Phase 3 must emit Ruby with non-empty reading");
218        borrowed::AozoraNode::Ruby(self.arena.alloc(borrowed::Ruby {
219            base,
220            reading,
221            delim_explicit,
222        }))
223    }
224
225    /// `AozoraNode::Bouten(Bouten { kind, target, position })`.
226    ///
227    /// `target` carries the [`borrowed::NonEmpty`] invariant —
228    /// Phase 3 resolves the forward reference before emitting.
229    ///
230    /// # Panics
231    ///
232    /// Panics if `target` is empty. The forward-reference resolver
233    /// in Phase 3 always lands a non-empty target before emit; an
234    /// empty payload here signals a classifier bug.
235    #[must_use]
236    pub fn bouten(
237        &self,
238        kind: BoutenKind,
239        target: borrowed::Content<'a>,
240        position: BoutenPosition,
241    ) -> borrowed::AozoraNode<'a> {
242        let target = borrowed::NonEmpty::new(target)
243            .expect("Phase 3 must emit Bouten with a resolved non-empty target");
244        borrowed::AozoraNode::Bouten(self.arena.alloc(borrowed::Bouten {
245            kind,
246            target,
247            position,
248        }))
249    }
250
251    /// `AozoraNode::TateChuYoko(TateChuYoko { text })`.
252    ///
253    /// `text` carries the [`borrowed::NonEmpty`] invariant.
254    ///
255    /// # Panics
256    ///
257    /// Panics if `text` is empty.
258    #[must_use]
259    pub fn tate_chu_yoko(&self, text: borrowed::Content<'a>) -> borrowed::AozoraNode<'a> {
260        let text = borrowed::NonEmpty::new(text)
261            .expect("Phase 3 must emit TateChuYoko with non-empty text");
262        borrowed::AozoraNode::TateChuYoko(self.arena.alloc(borrowed::TateChuYoko { text }))
263    }
264
265    /// `AozoraNode::Gaiji(g)`.
266    #[must_use]
267    pub fn gaiji(&self, g: &'a borrowed::Gaiji<'a>) -> borrowed::AozoraNode<'a> {
268        borrowed::AozoraNode::Gaiji(g)
269    }
270
271    /// `AozoraNode::Indent(i)`.
272    #[must_use]
273    pub fn indent(&self, i: Indent) -> borrowed::AozoraNode<'a> {
274        borrowed::AozoraNode::Indent(i)
275    }
276
277    /// `AozoraNode::AlignEnd(a)`.
278    #[must_use]
279    pub fn align_end(&self, a: AlignEnd) -> borrowed::AozoraNode<'a> {
280        borrowed::AozoraNode::AlignEnd(a)
281    }
282
283    /// `AozoraNode::Warichu(Warichu { upper, lower })`.
284    #[must_use]
285    pub fn warichu(
286        &self,
287        upper: borrowed::Content<'a>,
288        lower: borrowed::Content<'a>,
289    ) -> borrowed::AozoraNode<'a> {
290        borrowed::AozoraNode::Warichu(self.arena.alloc(borrowed::Warichu { upper, lower }))
291    }
292
293    /// `AozoraNode::Keigakomi(k)`.
294    #[must_use]
295    pub fn keigakomi(&self, k: Keigakomi) -> borrowed::AozoraNode<'a> {
296        borrowed::AozoraNode::Keigakomi(k)
297    }
298
299    /// `AozoraNode::PageBreak`.
300    #[must_use]
301    pub fn page_break(&self) -> borrowed::AozoraNode<'a> {
302        borrowed::AozoraNode::PageBreak
303    }
304
305    /// `AozoraNode::SectionBreak(k)`.
306    #[must_use]
307    pub fn section_break(&self, k: SectionKind) -> borrowed::AozoraNode<'a> {
308        borrowed::AozoraNode::SectionBreak(k)
309    }
310
311    /// `AozoraNode::AozoraHeading(AozoraHeading { kind, text })`.
312    ///
313    /// `text` carries the [`borrowed::NonEmpty`] invariant.
314    ///
315    /// # Panics
316    ///
317    /// Panics if `text` is empty.
318    #[must_use]
319    pub fn aozora_heading(
320        &self,
321        kind: AozoraHeadingKind,
322        text: borrowed::Content<'a>,
323    ) -> borrowed::AozoraNode<'a> {
324        let text = borrowed::NonEmpty::new(text)
325            .expect("Phase 3 must emit AozoraHeading with non-empty text");
326        borrowed::AozoraNode::AozoraHeading(
327            self.arena.alloc(borrowed::AozoraHeading { kind, text }),
328        )
329    }
330
331    /// `AozoraNode::HeadingHint(HeadingHint { level, target })`.
332    ///
333    /// `target` carries the [`borrowed::NonEmptyStr`] invariant.
334    ///
335    /// # Panics
336    ///
337    /// Panics if `target` is empty. Phase 3 emits the hint only
338    /// after the forward-reference target lands non-empty; an empty
339    /// payload here signals a classifier bug.
340    pub fn heading_hint(&mut self, level: u8, target: &str) -> borrowed::AozoraNode<'a> {
341        let target = borrowed::NonEmptyStr::new(self.interner.intern(target))
342            .expect("Phase 3 must emit HeadingHint with non-empty target");
343        borrowed::AozoraNode::HeadingHint(self.arena.alloc(borrowed::HeadingHint { level, target }))
344    }
345
346    /// `AozoraNode::Sashie(Sashie { file, caption })`.
347    ///
348    /// `file` carries the [`borrowed::NonEmptyStr`] invariant.
349    ///
350    /// # Panics
351    ///
352    /// Panics if `file` is empty.
353    pub fn sashie(
354        &mut self,
355        file: &str,
356        caption: Option<borrowed::Content<'a>>,
357    ) -> borrowed::AozoraNode<'a> {
358        let file = borrowed::NonEmptyStr::new(self.interner.intern(file))
359            .expect("Phase 3 must emit Sashie with non-empty file path");
360        borrowed::AozoraNode::Sashie(self.arena.alloc(borrowed::Sashie { file, caption }))
361    }
362
363    /// `AozoraNode::Kaeriten(Kaeriten { mark })`.
364    ///
365    /// `mark` carries the [`borrowed::NonEmptyStr`] invariant.
366    ///
367    /// # Panics
368    ///
369    /// Panics if `mark` is empty.
370    pub fn kaeriten(&mut self, mark: &str) -> borrowed::AozoraNode<'a> {
371        let mark = borrowed::NonEmptyStr::new(self.interner.intern(mark))
372            .expect("Phase 3 must emit Kaeriten with non-empty mark");
373        borrowed::AozoraNode::Kaeriten(self.arena.alloc(borrowed::Kaeriten { mark }))
374    }
375
376    /// `AozoraNode::Annotation(a)`.
377    #[must_use]
378    pub fn annotation(&self, a: &'a borrowed::Annotation<'a>) -> borrowed::AozoraNode<'a> {
379        borrowed::AozoraNode::Annotation(a)
380    }
381
382    /// `AozoraNode::DoubleRuby(DoubleRuby { content })`.
383    ///
384    /// `content` carries the [`borrowed::NonEmpty`] invariant — Phase 3
385    /// pre-filters `《《》》` with empty body into plain text so this
386    /// path is never reached with an empty payload.
387    ///
388    /// # Panics
389    ///
390    /// Panics if `content` is empty. Phase 3's pre-filter is the
391    /// gate; an empty payload here signals a classifier bug.
392    #[must_use]
393    pub fn double_ruby(&self, content: borrowed::Content<'a>) -> borrowed::AozoraNode<'a> {
394        let content = borrowed::NonEmpty::new(content)
395            .expect("Phase 3 pre-filters empty DoubleRuby into plain");
396        borrowed::AozoraNode::DoubleRuby(self.arena.alloc(borrowed::DoubleRuby { content }))
397    }
398
399    /// `AozoraNode::Container(c)`.
400    #[must_use]
401    pub fn container(&self, c: Container) -> borrowed::AozoraNode<'a> {
402        borrowed::AozoraNode::Container(c)
403    }
404}
405
406#[cfg(test)]
407mod tests {
408    //! Per-variant round-trip tests for `BorrowedAllocator`.
409    //!
410    //! Each test constructs one `borrowed::AozoraNode<'a>` via the
411    //! allocator and asserts the resulting payload fields match what
412    //! we asked for. Together they cover all 17 node variants plus
413    //! content / segment composition + interner dedup.
414
415    use core::ptr;
416
417    use super::*;
418    use crate::borrowed;
419    use crate::{
420        AlignEnd, AnnotationKind, AozoraHeadingKind, BoutenKind, BoutenPosition, Container,
421        ContainerKind, Indent, Keigakomi, SectionKind,
422    };
423
424    fn fresh_alloc(arena: &Arena) -> BorrowedAllocator<'_> {
425        BorrowedAllocator::new(arena)
426    }
427
428    #[test]
429    fn ruby_round_trip() {
430        let arena = Arena::new();
431        let mut a = fresh_alloc(&arena);
432        let base = a.content_plain("青梅");
433        let reading = a.content_plain("おうめ");
434        let n = a.ruby(base, reading, true);
435        match n {
436            borrowed::AozoraNode::Ruby(r) => {
437                assert_eq!(r.base.as_plain(), Some("青梅"));
438                assert_eq!(r.reading.as_plain(), Some("おうめ"));
439                assert!(r.delim_explicit);
440            }
441            other => panic!("expected Ruby, got {other:?}"),
442        }
443    }
444
445    #[test]
446    fn bouten_round_trip() {
447        let arena = Arena::new();
448        let mut a = fresh_alloc(&arena);
449        let target = a.content_plain("青空");
450        let n = a.bouten(BoutenKind::Goma, target, BoutenPosition::Right);
451        match n {
452            borrowed::AozoraNode::Bouten(b) => {
453                assert_eq!(b.kind, BoutenKind::Goma);
454                assert_eq!(b.target.as_plain(), Some("青空"));
455                assert_eq!(b.position, BoutenPosition::Right);
456            }
457            other => panic!("expected Bouten, got {other:?}"),
458        }
459    }
460
461    #[test]
462    fn tate_chu_yoko_round_trip() {
463        let arena = Arena::new();
464        let mut a = fresh_alloc(&arena);
465        let text = a.content_plain("12");
466        let n = a.tate_chu_yoko(text);
467        match n {
468            borrowed::AozoraNode::TateChuYoko(t) => {
469                assert_eq!(t.text.as_plain(), Some("12"));
470            }
471            other => panic!("expected TateChuYoko, got {other:?}"),
472        }
473    }
474
475    #[test]
476    fn gaiji_full_metadata() {
477        let arena = Arena::new();
478        let mut a = fresh_alloc(&arena);
479        let g = a.make_gaiji(
480            "木+吶のつくり",
481            Some(Resolved::Char('𠀋')),
482            Some("第3水準1-85-54"),
483        );
484        let n = a.gaiji(g);
485        match n {
486            borrowed::AozoraNode::Gaiji(gn) => {
487                assert_eq!(gn.description, "木+吶のつくり");
488                assert_eq!(gn.ucs, Some(Resolved::Char('𠀋')));
489                assert_eq!(gn.mencode, Some("第3水準1-85-54"));
490            }
491            other => panic!("expected Gaiji, got {other:?}"),
492        }
493    }
494
495    #[test]
496    fn gaiji_no_mencode() {
497        let arena = Arena::new();
498        let mut a = fresh_alloc(&arena);
499        let g = a.make_gaiji("desc", None, None);
500        let n = a.gaiji(g);
501        match n {
502            borrowed::AozoraNode::Gaiji(gn) => {
503                assert_eq!(gn.description, "desc");
504                assert!(gn.ucs.is_none());
505                assert!(gn.mencode.is_none());
506            }
507            other => panic!("expected Gaiji, got {other:?}"),
508        }
509    }
510
511    #[test]
512    fn indent_round_trip() {
513        let arena = Arena::new();
514        let a = fresh_alloc(&arena);
515        let n = a.indent(Indent { amount: 3 });
516        assert!(matches!(
517            n,
518            borrowed::AozoraNode::Indent(Indent { amount: 3 })
519        ));
520    }
521
522    #[test]
523    fn align_end_round_trip() {
524        let arena = Arena::new();
525        let a = fresh_alloc(&arena);
526        let n = a.align_end(AlignEnd { offset: 2 });
527        assert!(matches!(
528            n,
529            borrowed::AozoraNode::AlignEnd(AlignEnd { offset: 2 })
530        ));
531    }
532
533    #[test]
534    fn warichu_round_trip() {
535        let arena = Arena::new();
536        let mut a = fresh_alloc(&arena);
537        let upper = a.content_plain("上");
538        let lower = a.content_plain("下");
539        let n = a.warichu(upper, lower);
540        match n {
541            borrowed::AozoraNode::Warichu(w) => {
542                assert_eq!(w.upper.as_plain(), Some("上"));
543                assert_eq!(w.lower.as_plain(), Some("下"));
544            }
545            other => panic!("expected Warichu, got {other:?}"),
546        }
547    }
548
549    #[test]
550    fn keigakomi_round_trip() {
551        let arena = Arena::new();
552        let a = fresh_alloc(&arena);
553        let n = a.keigakomi(Keigakomi);
554        assert!(matches!(n, borrowed::AozoraNode::Keigakomi(Keigakomi)));
555    }
556
557    #[test]
558    fn page_break_round_trip() {
559        let arena = Arena::new();
560        let a = fresh_alloc(&arena);
561        let n = a.page_break();
562        assert!(matches!(n, borrowed::AozoraNode::PageBreak));
563    }
564
565    #[test]
566    fn section_break_round_trip() {
567        let arena = Arena::new();
568        let a = fresh_alloc(&arena);
569        let n = a.section_break(SectionKind::Choho);
570        assert!(matches!(
571            n,
572            borrowed::AozoraNode::SectionBreak(SectionKind::Choho)
573        ));
574    }
575
576    #[test]
577    fn aozora_heading_round_trip() {
578        let arena = Arena::new();
579        let mut a = fresh_alloc(&arena);
580        let text = a.content_plain("見出し");
581        let n = a.aozora_heading(AozoraHeadingKind::Window, text);
582        match n {
583            borrowed::AozoraNode::AozoraHeading(h) => {
584                assert_eq!(h.kind, AozoraHeadingKind::Window);
585                assert_eq!(h.text.as_plain(), Some("見出し"));
586            }
587            other => panic!("expected AozoraHeading, got {other:?}"),
588        }
589    }
590
591    #[test]
592    fn heading_hint_round_trip() {
593        let arena = Arena::new();
594        let mut a = fresh_alloc(&arena);
595        let n = a.heading_hint(2, "対象");
596        match n {
597            borrowed::AozoraNode::HeadingHint(h) => {
598                assert_eq!(h.level, 2);
599                assert_eq!(h.target.as_str(), "対象");
600            }
601            other => panic!("expected HeadingHint, got {other:?}"),
602        }
603    }
604
605    #[test]
606    fn sashie_with_caption() {
607        let arena = Arena::new();
608        let mut a = fresh_alloc(&arena);
609        let caption = a.content_plain("挿絵キャプション");
610        let n = a.sashie("fig01.png", Some(caption));
611        match n {
612            borrowed::AozoraNode::Sashie(s) => {
613                assert_eq!(s.file.as_str(), "fig01.png");
614                assert_eq!(
615                    s.caption.and_then(borrowed::Content::as_plain),
616                    Some("挿絵キャプション")
617                );
618            }
619            other => panic!("expected Sashie, got {other:?}"),
620        }
621    }
622
623    #[test]
624    fn sashie_without_caption() {
625        let arena = Arena::new();
626        let mut a = fresh_alloc(&arena);
627        let n = a.sashie("fig02.png", None);
628        match n {
629            borrowed::AozoraNode::Sashie(s) => {
630                assert_eq!(s.file.as_str(), "fig02.png");
631                assert!(s.caption.is_none());
632            }
633            other => panic!("expected Sashie, got {other:?}"),
634        }
635    }
636
637    #[test]
638    fn kaeriten_round_trip() {
639        let arena = Arena::new();
640        let mut a = fresh_alloc(&arena);
641        let n = a.kaeriten("一");
642        match n {
643            borrowed::AozoraNode::Kaeriten(k) => assert_eq!(k.mark.as_str(), "一"),
644            other => panic!("expected Kaeriten, got {other:?}"),
645        }
646    }
647
648    #[test]
649    fn annotation_round_trip() {
650        let arena = Arena::new();
651        let mut a = fresh_alloc(&arena);
652        let payload = a.make_annotation("[#X]", AnnotationKind::Unknown);
653        let n = a.annotation(payload);
654        match n {
655            borrowed::AozoraNode::Annotation(an) => {
656                assert_eq!(an.raw.as_str(), "[#X]");
657                assert_eq!(an.kind, AnnotationKind::Unknown);
658            }
659            other => panic!("expected Annotation, got {other:?}"),
660        }
661    }
662
663    #[test]
664    fn double_ruby_round_trip() {
665        let arena = Arena::new();
666        let mut a = fresh_alloc(&arena);
667        let content = a.content_plain("重要");
668        let n = a.double_ruby(content);
669        match n {
670            borrowed::AozoraNode::DoubleRuby(d) => {
671                assert_eq!(d.content.as_plain(), Some("重要"));
672            }
673            other => panic!("expected DoubleRuby, got {other:?}"),
674        }
675    }
676
677    #[test]
678    fn container_round_trip() {
679        let arena = Arena::new();
680        let a = fresh_alloc(&arena);
681        let c = Container {
682            kind: ContainerKind::Indent { amount: 1 },
683        };
684        let n = a.container(c);
685        assert!(matches!(n, borrowed::AozoraNode::Container(cc) if cc == c));
686    }
687
688    // ---------------------------------------------------------------------
689    // Content / segment composition (canonicalisation rules)
690    // ---------------------------------------------------------------------
691
692    #[test]
693    fn content_plain_empty_collapses_to_empty_segments() {
694        let arena = Arena::new();
695        let mut a = fresh_alloc(&arena);
696        let c = a.content_plain("");
697        assert!(matches!(c, borrowed::Content::Segments(s) if s.is_empty()));
698    }
699
700    #[test]
701    fn content_plain_nonempty_returns_plain_variant() {
702        let arena = Arena::new();
703        let mut a = fresh_alloc(&arena);
704        let c = a.content_plain("hello");
705        assert_eq!(c.as_plain(), Some("hello"));
706    }
707
708    #[test]
709    fn content_segments_preserves_order_and_kind() {
710        let arena = Arena::new();
711        let mut a = fresh_alloc(&arena);
712        let g = a.make_gaiji("X", None, None);
713        let seg_g = a.seg_gaiji(g);
714        let seg_t1 = a.seg_text("before ");
715        let seg_t2 = a.seg_text(" after");
716        let ann = a.make_annotation("[#X]", AnnotationKind::Unknown);
717        let seg_a = a.seg_annotation(ann);
718        let c = a.content_segments(&[seg_t1, seg_g, seg_t2, seg_a]);
719        let borrowed::Content::Segments(segs) = c else {
720            panic!("expected Segments variant for mixed-kind input");
721        };
722        assert_eq!(segs.len(), 4);
723        assert!(matches!(&segs[0], borrowed::Segment::Text(t) if *t == "before "));
724        assert!(matches!(&segs[1], borrowed::Segment::Gaiji(_)));
725        assert!(matches!(&segs[2], borrowed::Segment::Text(t) if *t == " after"));
726        assert!(matches!(&segs[3], borrowed::Segment::Annotation(_)));
727    }
728
729    #[test]
730    fn content_segments_all_text_collapses_to_plain() {
731        let arena = Arena::new();
732        let mut a = fresh_alloc(&arena);
733        let s1 = a.seg_text("hi ");
734        let s2 = a.seg_text("there");
735        let c = a.content_segments(&[s1, s2]);
736        assert_eq!(c.as_plain(), Some("hi there"));
737    }
738
739    #[test]
740    fn content_segments_empty_collapses_to_empty_segments() {
741        let arena = Arena::new();
742        let mut a = fresh_alloc(&arena);
743        let c = a.content_segments(&[]);
744        assert!(matches!(c, borrowed::Content::Segments(s) if s.is_empty()));
745    }
746
747    // ---------------------------------------------------------------------
748    // Interner is wired up — repeated short strings share a single
749    // arena slot.
750    // ---------------------------------------------------------------------
751
752    #[test]
753    fn interner_dedups_repeated_readings() {
754        let arena = Arena::new();
755        let mut a = fresh_alloc(&arena);
756        let base1 = a.content_plain("青梅");
757        let reading1 = a.content_plain("おうめ");
758        let n1 = a.ruby(base1, reading1, false);
759        let base2 = a.content_plain("青梅");
760        let reading2 = a.content_plain("おうめ");
761        let n2 = a.ruby(base2, reading2, false);
762        let borrowed::AozoraNode::Ruby(r1) = n1 else {
763            unreachable!();
764        };
765        let borrowed::AozoraNode::Ruby(r2) = n2 else {
766            unreachable!();
767        };
768        let s1 = r1.reading.as_plain().expect("plain");
769        let s2 = r2.reading.as_plain().expect("plain");
770        assert_eq!(
771            s1.as_ptr(),
772            s2.as_ptr(),
773            "interner must dedup repeated readings"
774        );
775    }
776
777    #[test]
778    fn arena_accessor_returns_construction_arena() {
779        let arena = Arena::new();
780        let a = fresh_alloc(&arena);
781        assert!(ptr::eq(a.arena(), &raw const arena));
782    }
783}