Skip to main content

aozora_syntax/
lib.rs

1//! AST type definitions for the aozora parser.
2//!
3//! # AST shape
4//!
5//! The **sole AST** is the borrowed-AST defined in [`borrowed`]:
6//! arena-allocated, `Copy`-able, deduplicated through
7//! [`borrowed::Interner`]. Public consumers (`aozora` meta crate,
8//! FFI / WASM / Python drivers, CLI) parse via
9//! `aozora::Document::parse()` and walk a `borrowed::AozoraNode<'_>`.
10//!
11//! # Top-level surface
12//!
13//! Only the **shared `Copy`-able payloads** referenced by the borrowed
14//! AST (`BoutenKind`, `BoutenPosition`, `Indent`, `AlignEnd`,
15//! `Container`, `ContainerKind`, `Keigakomi`, `SectionKind`,
16//! `AozoraHeadingKind`, `AnnotationKind`) live at the top level. The
17//! borrowed-AST node types live under `borrowed::`. The arena-backed
18//! builder lives under `alloc::`.
19
20#![forbid(unsafe_code)]
21
22use miette::Diagnostic;
23use thiserror::Error;
24
25pub mod accent;
26pub mod alloc;
27pub mod borrowed;
28mod extension;
29pub mod node_kind;
30
31pub use extension::ContainerKind;
32pub use node_kind::NodeKind;
33
34/// Byte-range span into the original source document.
35///
36/// Re-exported from [`aozora_spec::Span`] — see that module for the
37/// canonical definition.
38pub use aozora_spec::Span;
39
40/// Paired block container payload: carries only the kind descriptor.
41///
42/// Children live in the AST as the container node's children
43/// (the `post_process` paired-container splice reparents them).
44#[derive(Debug, Clone, Copy, PartialEq, Eq)]
45#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
46pub struct Container {
47    pub kind: ContainerKind,
48}
49
50#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
51#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
52#[non_exhaustive]
53pub enum BoutenKind {
54    /// ゴマ
55    Goma,
56    /// 白ゴマ
57    WhiteSesame,
58    /// 丸
59    Circle,
60    /// 白丸
61    WhiteCircle,
62    /// 二重丸
63    DoubleCircle,
64    /// 蛇の目
65    Janome,
66    /// ばつ
67    Cross,
68    /// 白三角
69    WhiteTriangle,
70    /// 波線
71    WavyLine,
72    /// 傍線
73    UnderLine,
74    /// 二重傍線
75    DoubleUnderLine,
76}
77
78/// Which side of the vertical-writing base text the bouten marks sit on.
79#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)]
80#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
81#[non_exhaustive]
82pub enum BoutenPosition {
83    #[default]
84    Right,
85    Left,
86}
87
88#[derive(Debug, Clone, Copy, PartialEq, Eq)]
89#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
90pub struct Indent {
91    pub amount: u8,
92}
93
94#[derive(Debug, Clone, Copy, PartialEq, Eq)]
95#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
96pub struct AlignEnd {
97    /// Offset in chars from the right edge. `0` = 地付き, `n` = 地から n 字上げ.
98    pub offset: u8,
99}
100
101#[derive(Debug, Clone, Copy, PartialEq, Eq)]
102#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
103pub struct Keigakomi;
104
105#[derive(Debug, Clone, Copy, PartialEq, Eq)]
106#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
107#[non_exhaustive]
108pub enum SectionKind {
109    /// `[#改丁]`
110    Choho,
111    /// `[#改段]`
112    Dan,
113    /// `[#改見開き]`
114    Spread,
115}
116
117#[derive(Debug, Clone, Copy, PartialEq, Eq)]
118#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
119#[non_exhaustive]
120pub enum AozoraHeadingKind {
121    /// 窓見出し
122    Window,
123    /// 副見出し
124    Sub,
125}
126
127#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
128#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
129#[non_exhaustive]
130pub enum AnnotationKind {
131    /// The parser recognised the notation as Aozora-shaped but not registered.
132    Unknown,
133    /// `[#「」」はママ]`-style editorial as-is marker.
134    AsIs,
135    /// Source-text divergence note (`[#「X」は底本では「Y」]`).
136    TextualNote,
137    /// A ruby span that couldn't be parsed cleanly.
138    InvalidRubySpan,
139    /// Inline warichu opener — `[#割り注]`.
140    WarichuOpen,
141    /// Inline warichu closer — `[#割り注終わり]`.
142    WarichuClose,
143}
144
145/// Parse- and render-time error surface for `aozora-syntax` consumers.
146#[derive(Debug, Error, Diagnostic)]
147#[non_exhaustive]
148pub enum SyntaxError {
149    #[error("未知のノード種別です: {kind}")]
150    #[diagnostic(code(aozora::syntax::unknown_kind))]
151    UnknownKind { kind: Box<str> },
152}
153
154#[cfg(test)]
155mod tests {
156    use super::*;
157
158    #[test]
159    fn empty_span_is_empty_and_zero_length() {
160        let s = Span::new(42, 42);
161        assert!(s.is_empty());
162        assert_eq!(s.len(), 0);
163    }
164
165    #[test]
166    fn span_slices_source_buffer() {
167        let source = "hello world";
168        let s = Span::new(6, 11);
169        assert_eq!(s.slice(source), "world");
170    }
171
172    #[test]
173    fn bouten_position_defaults_to_right() {
174        assert_eq!(BoutenPosition::default(), BoutenPosition::Right);
175    }
176}