1use aozora_encoding::gaiji::Resolved;
44use aozora_pipeline::BorrowedLexOutput;
45use aozora_syntax::borrowed::{
46 Annotation as AozoraAnnotation, AozoraNode, Bouten as AozoraBouten, Content,
47 DoubleRuby as AozoraDoubleRuby, Gaiji as AozoraGaiji, HeadingHint, NodeRef, Ruby as AozoraRuby,
48 Segment, TateChuYoko,
49};
50use aozora_syntax::{AnnotationKind, BoutenKind, BoutenPosition, ContainerKind, SectionKind};
51use comrak::nodes::{
52 AstNode, ListType, NodeHeading, NodeList, NodeValue, Sourcepos, TableAlignment,
53};
54use serde::Serialize;
55
56use crate::sentinels::{
57 BlockSentinelKind, SentinelCursor, flatten_registry_in_source_order, for_each_text_descendant,
58 is_sentinel_char, paragraph_sole_block_sentinel,
59};
60
61fn to_u32(n: usize) -> u32 {
64 u32::try_from(n).unwrap_or(u32::MAX)
65}
66
67#[derive(Debug, Default, Clone, Serialize)]
68#[serde(rename_all = "camelCase")]
69pub struct IrDocument {
70 pub blocks: Vec<IrBlock>,
71 pub diagnostics: Vec<IrDiagnostic>,
72}
73
74#[derive(Debug, Clone, Serialize)]
75#[serde(tag = "kind", rename_all = "camelCase")]
76pub enum IrBlock {
77 Paragraph {
78 children: Vec<IrInline>,
79 #[serde(skip_serializing_if = "Option::is_none")]
80 source_line: Option<u32>,
81 #[serde(skip_serializing_if = "Option::is_none")]
82 range: Option<Range>,
83 },
84 Heading {
85 level: u8,
86 children: Vec<IrInline>,
87 #[serde(skip_serializing_if = "Option::is_none")]
88 source_line: Option<u32>,
89 #[serde(skip_serializing_if = "Option::is_none")]
90 range: Option<Range>,
91 },
92 Blockquote {
93 children: Vec<Self>,
94 #[serde(skip_serializing_if = "Option::is_none")]
95 source_line: Option<u32>,
96 #[serde(skip_serializing_if = "Option::is_none")]
97 range: Option<Range>,
98 },
99 List {
100 ordered: bool,
101 #[serde(skip_serializing_if = "Option::is_none")]
102 start: Option<u32>,
103 items: Vec<IrListItem>,
104 #[serde(skip_serializing_if = "Option::is_none")]
105 source_line: Option<u32>,
106 #[serde(skip_serializing_if = "Option::is_none")]
107 range: Option<Range>,
108 },
109 CodeBlock {
110 #[serde(skip_serializing_if = "Option::is_none")]
111 lang: Option<String>,
112 value: String,
113 #[serde(skip_serializing_if = "Option::is_none")]
114 source_line: Option<u32>,
115 #[serde(skip_serializing_if = "Option::is_none")]
116 range: Option<Range>,
117 },
118 ThematicBreak {
119 #[serde(skip_serializing_if = "Option::is_none")]
120 source_line: Option<u32>,
121 #[serde(skip_serializing_if = "Option::is_none")]
122 range: Option<Range>,
123 },
124 Table {
125 header: IrTableRow,
126 rows: Vec<IrTableRow>,
127 align: Vec<IrTableAlign>,
128 #[serde(skip_serializing_if = "Option::is_none")]
129 source_line: Option<u32>,
130 #[serde(skip_serializing_if = "Option::is_none")]
131 range: Option<Range>,
132 },
133 Container {
139 subtype: String,
140 children: Vec<Self>,
141 #[serde(skip_serializing_if = "Option::is_none")]
142 indent_level: Option<u32>,
143 #[serde(skip_serializing_if = "Option::is_none")]
144 source_line: Option<u32>,
145 #[serde(skip_serializing_if = "Option::is_none")]
146 range: Option<Range>,
147 },
148 PageBreak {
149 #[serde(skip_serializing_if = "Option::is_none")]
150 source_line: Option<u32>,
151 #[serde(skip_serializing_if = "Option::is_none")]
152 range: Option<Range>,
153 },
154 SectionBreak {
159 subtype: String,
160 #[serde(skip_serializing_if = "Option::is_none")]
161 source_line: Option<u32>,
162 #[serde(skip_serializing_if = "Option::is_none")]
163 range: Option<Range>,
164 },
165}
166
167#[derive(Debug, Clone, Serialize)]
168pub struct IrTableRow {
169 pub cells: Vec<Vec<IrInline>>,
170 #[serde(skip_serializing_if = "Option::is_none")]
171 pub range: Option<Range>,
172}
173
174#[derive(Debug, Clone, Serialize)]
175pub struct IrListItem {
176 pub children: Vec<IrBlock>,
177 #[serde(skip_serializing_if = "Option::is_none")]
178 pub range: Option<Range>,
179}
180
181#[derive(Debug, Clone, Copy, Serialize)]
182#[serde(rename_all = "camelCase")]
183pub enum IrTableAlign {
184 Left,
185 Center,
186 Right,
187 Default,
188}
189
190#[derive(Debug, Clone, Serialize)]
191#[serde(tag = "kind", rename_all = "camelCase")]
192pub enum IrInline {
193 Text {
194 value: String,
195 #[serde(skip_serializing_if = "Option::is_none")]
196 range: Option<Range>,
197 },
198 Code {
199 value: String,
200 #[serde(skip_serializing_if = "Option::is_none")]
201 range: Option<Range>,
202 },
203 Strong {
204 children: Vec<Self>,
205 #[serde(skip_serializing_if = "Option::is_none")]
206 range: Option<Range>,
207 },
208 Emphasis {
209 children: Vec<Self>,
210 #[serde(skip_serializing_if = "Option::is_none")]
211 range: Option<Range>,
212 },
213 Link {
214 href: String,
215 #[serde(skip_serializing_if = "Option::is_none")]
216 title: Option<String>,
217 children: Vec<Self>,
218 #[serde(skip_serializing_if = "Option::is_none")]
219 range: Option<Range>,
220 },
221 Image {
225 url: String,
226 #[serde(skip_serializing_if = "Option::is_none")]
227 title: Option<String>,
228 alt: Vec<Self>,
229 #[serde(skip_serializing_if = "Option::is_none")]
230 range: Option<Range>,
231 },
232 LineBreak {
233 hard: bool,
234 #[serde(skip_serializing_if = "Option::is_none")]
235 range: Option<Range>,
236 },
237 Ruby {
242 base: Vec<Self>,
243 reading: String,
244 explicit: bool,
245 #[serde(skip_serializing_if = "Option::is_none")]
246 range: Option<Range>,
247 },
248 DoubleRuby {
255 base: Vec<Self>,
256 #[serde(skip_serializing_if = "Option::is_none")]
257 range: Option<Range>,
258 },
259 Bouten {
265 children: Vec<Self>,
266 style: String,
267 position: String,
268 #[serde(skip_serializing_if = "Option::is_none")]
269 range: Option<Range>,
270 },
271 Gaiji {
272 #[serde(skip_serializing_if = "Option::is_none")]
273 codepoint: Option<String>,
274 #[serde(skip_serializing_if = "Option::is_none")]
275 description: Option<String>,
276 #[serde(skip_serializing_if = "Option::is_none")]
277 fallback_text: Option<String>,
278 #[serde(skip_serializing_if = "Option::is_none")]
279 range: Option<Range>,
280 },
281 Tcy {
282 text: String,
283 #[serde(skip_serializing_if = "Option::is_none")]
284 range: Option<Range>,
285 },
286 Annotation {
292 payload: String,
293 #[serde(skip_serializing_if = "Option::is_none")]
294 resolved: Option<String>,
295 #[serde(skip_serializing_if = "Option::is_none")]
296 range: Option<Range>,
297 },
298}
299
300#[derive(Debug, Clone, Serialize)]
301pub struct IrDiagnostic {
302 pub level: String,
303 pub message: String,
304 #[serde(skip_serializing_if = "Option::is_none")]
305 pub code: Option<String>,
306 #[serde(skip_serializing_if = "Option::is_none")]
307 pub range: Option<Range>,
308}
309
310#[derive(Debug, Clone, Copy, Serialize)]
311pub struct Range {
312 pub from: u32,
313 pub to: u32,
314}
315
316pub(crate) fn build_ir<'a>(
328 root: &'a AstNode<'a>,
329 lex_out: Option<&BorrowedLexOutput<'a>>,
330) -> IrDocument {
331 let nodes = lex_out
332 .map(flatten_registry_in_source_order)
333 .unwrap_or_default();
334 let mut walker = IrWalker::new(nodes.as_slice());
335 walker.walk_root(root);
336 IrDocument {
337 blocks: walker.finish(),
338 diagnostics: Vec::new(),
339 }
340}
341
342#[derive(Debug)]
357pub struct StreamingIrBuilder<'src> {
358 nodes: Vec<NodeRef<'src>>,
359 cursor_idx: usize,
360}
361
362impl<'src> StreamingIrBuilder<'src> {
363 #[must_use]
366 pub fn new(lex_out: Option<&BorrowedLexOutput<'src>>) -> Self {
367 Self {
368 nodes: lex_out
369 .map(flatten_registry_in_source_order)
370 .unwrap_or_default(),
371 cursor_idx: 0,
372 }
373 }
374
375 pub fn walk_block<'a>(&mut self, node: &'a AstNode<'a>) -> Vec<IrBlock> {
379 let mut walker = IrWalker::with_cursor_idx(self.nodes.as_slice(), self.cursor_idx);
380 walker.walk_top(node);
381 let next_idx = walker.cursor.position();
382 let blocks = walker.finish();
383 self.cursor_idx = next_idx;
384 blocks
385 }
386}
387
388struct IrWalker<'c, 'src> {
408 cursor: SentinelCursor<'c, 'src>,
409 top: Vec<IrBlock>,
412 open: Vec<OpenContainer>,
415}
416
417struct OpenContainer {
418 kind: ContainerKind,
419 source_line: Option<u32>,
420 children: Vec<IrBlock>,
421}
422
423impl<'c, 'src> IrWalker<'c, 'src> {
424 fn new(nodes: &'c [NodeRef<'src>]) -> Self {
425 Self {
426 cursor: SentinelCursor::new(nodes),
427 top: Vec::new(),
428 open: Vec::new(),
429 }
430 }
431
432 fn with_cursor_idx(nodes: &'c [NodeRef<'src>], idx: usize) -> Self {
436 Self {
437 cursor: SentinelCursor::with_position(nodes, idx),
438 top: Vec::new(),
439 open: Vec::new(),
440 }
441 }
442
443 fn finish(mut self) -> Vec<IrBlock> {
446 while let Some(open) = self.open.pop() {
447 let block = open.into_block();
448 place_in(&mut self.open, &mut self.top, block);
449 }
450 self.top
451 }
452
453 fn walk_root<'a>(&mut self, root: &'a AstNode<'a>) {
454 for child in root.children() {
455 self.walk_top(child);
456 }
457 }
458
459 fn walk_top<'a>(&mut self, node: &'a AstNode<'a>) {
460 let (source_line, is_paragraph) = top_metadata(node);
461 if is_paragraph && let Some(action) = self.classify_paragraph(node) {
462 self.dispatch_paragraph(action, source_line);
463 return;
464 }
465 if let Some(block) = self.walk_block(node, true) {
466 place_in(&mut self.open, &mut self.top, block);
467 }
468 }
469
470 fn classify_paragraph<'a>(&self, node: &'a AstNode<'a>) -> Option<ParagraphAction<'src>> {
474 if let Some(kind) = paragraph_sole_block_sentinel(node) {
475 return Some(ParagraphAction::BlockSentinel(kind));
476 }
477 let scan = ParaScan::run(node, &self.cursor);
478 if let Some(hint) = scan.first_heading_hint {
479 return Some(ParagraphAction::HeadingHint {
480 hint,
481 sentinels_to_consume: scan.total_sentinels,
482 });
483 }
484 None
485 }
486
487 fn dispatch_paragraph(&mut self, action: ParagraphAction<'src>, source_line: u32) {
488 match action {
489 ParagraphAction::BlockSentinel(kind) => self.handle_block_sentinel(kind, source_line),
490 ParagraphAction::HeadingHint {
491 hint,
492 sentinels_to_consume,
493 } => self.handle_heading_hint(hint, sentinels_to_consume, source_line),
494 }
495 }
496
497 fn handle_block_sentinel(&mut self, kind: BlockSentinelKind, source_line: u32) {
498 let Some(node_ref) = self.cursor.next() else {
499 return;
500 };
501 match (kind, node_ref) {
502 (BlockSentinelKind::Leaf, NodeRef::BlockLeaf(leaf)) => {
503 if let Some(block) = project_block_leaf(leaf, source_line) {
504 place_in(&mut self.open, &mut self.top, block);
505 }
506 }
507 (BlockSentinelKind::Open, NodeRef::BlockOpen(ck)) => {
508 self.open.push(OpenContainer {
509 kind: ck,
510 source_line: Some(source_line),
511 children: Vec::new(),
512 });
513 }
514 (BlockSentinelKind::Close, NodeRef::BlockClose(_)) => {
515 if let Some(open) = self.open.pop() {
516 let block = open.into_block();
517 place_in(&mut self.open, &mut self.top, block);
518 }
519 }
522 _ => {}
523 }
524 }
525
526 fn handle_heading_hint(
527 &mut self,
528 hint: &'src HeadingHint<'src>,
529 sentinels_to_consume: usize,
530 source_line: u32,
531 ) {
532 self.cursor.advance(sentinels_to_consume);
533 let block = IrBlock::Heading {
534 level: hint.level.clamp(1, 6),
535 children: vec![IrInline::Text {
536 value: hint.target.as_str().to_owned(),
537 range: None,
538 }],
539 source_line: Some(source_line),
540 range: None,
541 };
542 place_in(&mut self.open, &mut self.top, block);
543 }
544
545 fn walk_block<'a>(&mut self, node: &'a AstNode<'a>, top_level: bool) -> Option<IrBlock> {
546 let data = node.data.borrow();
547 let source_line = top_level.then(|| to_u32(data.sourcepos.start.line).max(1));
548 let range = sourcepos_to_range(&data.sourcepos);
549 match &data.value {
550 NodeValue::Paragraph => {
551 drop(data);
552 Some(IrBlock::Paragraph {
553 children: self.collect_inlines(node),
554 source_line,
555 range,
556 })
557 }
558 NodeValue::Heading(NodeHeading { level, .. }) => {
559 let level = (*level).clamp(1, 6);
560 drop(data);
561 Some(IrBlock::Heading {
562 level,
563 children: self.collect_inlines(node),
564 source_line,
565 range,
566 })
567 }
568 NodeValue::BlockQuote => {
569 drop(data);
570 Some(IrBlock::Blockquote {
571 children: self.collect_blocks(node),
572 source_line,
573 range,
574 })
575 }
576 NodeValue::List(NodeList {
577 list_type, start, ..
578 }) => {
579 let ordered = matches!(list_type, ListType::Ordered);
580 let start = (*start > 1).then(|| to_u32(*start));
581 drop(data);
582 Some(IrBlock::List {
583 ordered,
584 start,
585 items: self.collect_list_items(node),
586 source_line,
587 range,
588 })
589 }
590 NodeValue::CodeBlock(code) => {
591 let lang = (!code.info.is_empty()).then(|| code.info.clone());
592 let value = code.literal.clone();
593 drop(data);
594 Some(IrBlock::CodeBlock {
595 lang,
596 value,
597 source_line,
598 range,
599 })
600 }
601 NodeValue::ThematicBreak => {
602 drop(data);
603 Some(IrBlock::ThematicBreak { source_line, range })
604 }
605 NodeValue::Table(table) => {
606 let aligns: Vec<IrTableAlign> =
607 table.alignments.iter().copied().map(table_align).collect();
608 drop(data);
609 Some(self.walk_table(
610 node,
611 TableMeta {
612 align: aligns,
613 source_line,
614 range,
615 },
616 ))
617 }
618 _ => None,
623 }
624 }
625
626 fn walk_table<'a>(&mut self, node: &'a AstNode<'a>, meta: TableMeta) -> IrBlock {
627 let mut rows: Vec<IrTableRow> = Vec::new();
628 for child in node.children() {
629 rows.push(self.collect_table_row(child));
630 }
631 let header = rows.first().cloned().unwrap_or(IrTableRow {
632 cells: Vec::new(),
633 range: None,
634 });
635 let body = if rows.is_empty() {
636 Vec::new()
637 } else {
638 rows[1..].to_vec()
639 };
640 IrBlock::Table {
641 header,
642 rows: body,
643 align: meta.align,
644 source_line: meta.source_line,
645 range: meta.range,
646 }
647 }
648
649 fn collect_blocks<'a>(&mut self, node: &'a AstNode<'a>) -> Vec<IrBlock> {
650 let mut out = Vec::new();
651 for child in node.children() {
652 if let Some(block) = self.walk_block(child, false) {
653 out.push(block);
654 }
655 }
656 out
657 }
658
659 fn collect_list_items<'a>(&mut self, node: &'a AstNode<'a>) -> Vec<IrListItem> {
660 let mut out = Vec::new();
661 for child in node.children() {
662 let data = child.data.borrow();
663 let is_item = matches!(data.value, NodeValue::Item(_));
664 let range = sourcepos_to_range(&data.sourcepos);
665 drop(data);
666 if !is_item {
667 continue;
668 }
669 out.push(IrListItem {
670 children: self.collect_blocks(child),
671 range,
672 });
673 }
674 out
675 }
676
677 fn collect_table_row<'a>(&mut self, row: &'a AstNode<'a>) -> IrTableRow {
678 let data = row.data.borrow();
679 let range = sourcepos_to_range(&data.sourcepos);
680 drop(data);
681 let mut cells = Vec::new();
682 for cell in row.children() {
683 cells.push(self.collect_inlines(cell));
684 }
685 IrTableRow { cells, range }
686 }
687
688 fn collect_inlines<'a>(&mut self, node: &'a AstNode<'a>) -> Vec<IrInline> {
689 let mut out = Vec::new();
690 for child in node.children() {
691 self.emit_inline(child, &mut out);
692 }
693 out
694 }
695
696 fn emit_inline<'a>(&mut self, node: &'a AstNode<'a>, out: &mut Vec<IrInline>) {
697 let data = node.data.borrow();
698 let range = sourcepos_to_range(&data.sourcepos);
699 match &data.value {
700 NodeValue::Text(s) => {
701 let s = s.clone();
702 drop(data);
703 self.project_text_with_sentinels(&s, range, out);
704 }
705 NodeValue::Code(c) => {
706 let value = c.literal.clone();
707 drop(data);
708 out.push(IrInline::Code { value, range });
709 }
710 NodeValue::Strong => {
711 drop(data);
712 out.push(IrInline::Strong {
713 children: self.collect_inlines(node),
714 range,
715 });
716 }
717 NodeValue::Emph => {
718 drop(data);
719 out.push(IrInline::Emphasis {
720 children: self.collect_inlines(node),
721 range,
722 });
723 }
724 NodeValue::Link(link) => {
725 let href = link.url.clone();
726 let title = (!link.title.is_empty()).then(|| link.title.clone());
727 drop(data);
728 out.push(IrInline::Link {
729 href,
730 title,
731 children: self.collect_inlines(node),
732 range,
733 });
734 }
735 NodeValue::Image(image) => {
736 let url = image.url.clone();
737 let title = (!image.title.is_empty()).then(|| image.title.clone());
738 drop(data);
739 out.push(IrInline::Image {
740 url,
741 title,
742 alt: self.collect_inlines(node),
743 range,
744 });
745 }
746 NodeValue::SoftBreak => {
747 drop(data);
748 out.push(IrInline::LineBreak { hard: false, range });
749 }
750 NodeValue::LineBreak => {
751 drop(data);
752 out.push(IrInline::LineBreak { hard: true, range });
753 }
754 _ => {}
756 }
757 }
758
759 fn project_text_with_sentinels(
760 &mut self,
761 text: &str,
762 range: Option<Range>,
763 out: &mut Vec<IrInline>,
764 ) {
765 if !text.chars().any(is_sentinel_char) {
767 if !text.is_empty() {
768 out.push(IrInline::Text {
769 value: text.to_owned(),
770 range,
771 });
772 }
773 return;
774 }
775 let mut cursor = 0;
776 for (idx, ch) in text.char_indices() {
777 if !is_sentinel_char(ch) {
778 continue;
779 }
780 let head = &text[cursor..idx];
781 if !head.is_empty() {
782 out.push(IrInline::Text {
783 value: head.to_owned(),
784 range,
785 });
786 }
787 cursor = idx + ch.len_utf8();
788 let Some(node_ref) = self.cursor.next() else {
789 continue;
790 };
791 if let NodeRef::Inline(aozora) = node_ref
795 && let Some(inline) = project_inline(aozora)
796 {
797 out.push(inline);
798 }
799 }
800 let tail = &text[cursor..];
801 if !tail.is_empty() {
802 out.push(IrInline::Text {
803 value: tail.to_owned(),
804 range,
805 });
806 }
807 }
808}
809
810fn place_in(open: &mut [OpenContainer], top: &mut Vec<IrBlock>, block: IrBlock) {
813 if let Some(frame) = open.last_mut() {
814 frame.children.push(block);
815 } else {
816 top.push(block);
817 }
818}
819
820impl OpenContainer {
821 fn into_block(self) -> IrBlock {
822 IrBlock::Container {
823 subtype: container_subtype(self.kind).to_owned(),
824 children: self.children,
825 indent_level: container_indent_level(self.kind),
826 source_line: self.source_line,
827 range: None,
828 }
829 }
830}
831
832struct TableMeta {
833 align: Vec<IrTableAlign>,
834 source_line: Option<u32>,
835 range: Option<Range>,
836}
837
838#[derive(Debug, Clone, Copy)]
839enum ParagraphAction<'src> {
840 BlockSentinel(BlockSentinelKind),
841 HeadingHint {
842 hint: &'src HeadingHint<'src>,
843 sentinels_to_consume: usize,
844 },
845}
846
847fn top_metadata<'a>(node: &'a AstNode<'a>) -> (u32, bool) {
848 let data = node.data.borrow();
849 let line = u32::try_from(data.sourcepos.start.line)
850 .unwrap_or(u32::MAX)
851 .max(1);
852 let is_para = matches!(data.value, NodeValue::Paragraph);
853 (line, is_para)
854}
855
856struct ParaScan<'src> {
864 total_sentinels: usize,
868 first_heading_hint: Option<&'src HeadingHint<'src>>,
871}
872
873impl<'src> ParaScan<'src> {
874 fn run<'a>(node: &'a AstNode<'a>, cursor: &SentinelCursor<'_, 'src>) -> Self {
875 let mut total_sentinels = 0usize;
876 let mut first_heading_hint = None;
877 for_each_text_descendant(node, |text| {
878 for ch in text.chars() {
879 if !is_sentinel_char(ch) {
880 continue;
881 }
882 if first_heading_hint.is_none()
883 && let Some(NodeRef::Inline(AozoraNode::HeadingHint(h))) =
884 cursor.peek(total_sentinels)
885 {
886 first_heading_hint = Some(h);
887 }
888 total_sentinels += 1;
889 }
890 });
891 Self {
892 total_sentinels,
893 first_heading_hint,
894 }
895 }
896}
897
898fn project_inline(node: AozoraNode<'_>) -> Option<IrInline> {
903 match node {
904 AozoraNode::Ruby(r) => Some(project_ruby(r)),
905 AozoraNode::DoubleRuby(d) => Some(project_double_ruby(d)),
906 AozoraNode::Bouten(b) => Some(project_bouten(b)),
907 AozoraNode::TateChuYoko(t) => Some(project_tcy(t)),
908 AozoraNode::Gaiji(g) => Some(project_gaiji(g)),
909 AozoraNode::Annotation(a) => Some(project_annotation(a)),
910 _ => None,
916 }
917}
918
919fn project_block_leaf(node: AozoraNode<'_>, source_line: u32) -> Option<IrBlock> {
920 match node {
921 AozoraNode::PageBreak => Some(IrBlock::PageBreak {
922 source_line: Some(source_line),
923 range: None,
924 }),
925 AozoraNode::SectionBreak(kind) => Some(IrBlock::SectionBreak {
926 subtype: section_kind_subtype(kind).to_owned(),
927 source_line: Some(source_line),
928 range: None,
929 }),
930 _ => None,
933 }
934}
935
936fn project_ruby(r: &AozoraRuby<'_>) -> IrInline {
937 IrInline::Ruby {
938 base: project_content_inlines(r.base.get()),
939 reading: content_to_string(r.reading.get()),
940 explicit: r.delim_explicit,
941 range: None,
942 }
943}
944
945fn project_double_ruby(d: &AozoraDoubleRuby<'_>) -> IrInline {
946 IrInline::DoubleRuby {
947 base: project_content_inlines(d.content.get()),
948 range: None,
949 }
950}
951
952fn project_bouten(b: &AozoraBouten<'_>) -> IrInline {
953 IrInline::Bouten {
954 children: project_content_inlines(b.target.get()),
955 style: bouten_kind_str(b.kind).to_owned(),
956 position: bouten_position_str(b.position).to_owned(),
957 range: None,
958 }
959}
960
961fn project_tcy(t: &TateChuYoko<'_>) -> IrInline {
962 IrInline::Tcy {
963 text: content_to_string(t.text.get()),
964 range: None,
965 }
966}
967
968fn project_gaiji(g: &AozoraGaiji<'_>) -> IrInline {
969 IrInline::Gaiji {
970 codepoint: g.ucs.map(resolved_to_string),
971 description: (!g.description.is_empty()).then(|| g.description.to_owned()),
972 fallback_text: None,
973 range: None,
974 }
975}
976
977fn project_annotation(a: &AozoraAnnotation<'_>) -> IrInline {
978 IrInline::Annotation {
979 payload: a.raw.as_str().to_owned(),
980 resolved: annotation_kind_resolved(a.kind).map(str::to_owned),
981 range: None,
982 }
983}
984
985fn project_content_inlines(content: Content<'_>) -> Vec<IrInline> {
986 match content {
987 Content::Plain(s) if !s.is_empty() => vec![IrInline::Text {
988 value: s.to_owned(),
989 range: None,
990 }],
991 Content::Segments(segs) => {
992 let mut out = Vec::with_capacity(segs.len());
993 for seg in segs {
994 match *seg {
995 Segment::Text(t) if !t.is_empty() => out.push(IrInline::Text {
996 value: t.to_owned(),
997 range: None,
998 }),
999 Segment::Gaiji(g) => out.push(project_gaiji(g)),
1000 Segment::Annotation(a) => out.push(project_annotation(a)),
1001 _ => {}
1004 }
1005 }
1006 out
1007 }
1008 _ => Vec::new(),
1011 }
1012}
1013
1014fn content_to_string(content: Content<'_>) -> String {
1015 match content {
1016 Content::Plain(s) => s.to_owned(),
1017 Content::Segments(segs) => {
1018 let mut out = String::new();
1019 for seg in segs {
1020 if let Segment::Text(t) = seg {
1021 out.push_str(t);
1022 }
1023 }
1024 out
1025 }
1026 _ => String::new(),
1027 }
1028}
1029
1030fn resolved_to_string(r: Resolved) -> String {
1031 match r {
1032 Resolved::Char(c) => c.to_string(),
1033 Resolved::Multi(s) => s.to_owned(),
1034 }
1035}
1036
1037const fn bouten_kind_str(k: BoutenKind) -> &'static str {
1050 match k {
1051 BoutenKind::Goma => "goma",
1052 BoutenKind::WhiteSesame => "whiteSesame",
1053 BoutenKind::Circle => "circle",
1054 BoutenKind::WhiteCircle => "whiteCircle",
1055 BoutenKind::DoubleCircle => "doubleCircle",
1056 BoutenKind::Janome => "janome",
1057 BoutenKind::Cross => "cross",
1058 BoutenKind::WhiteTriangle => "whiteTriangle",
1059 BoutenKind::WavyLine => "wavyLine",
1060 BoutenKind::UnderLine => "underLine",
1061 BoutenKind::DoubleUnderLine => "doubleUnderLine",
1062 _ => "unknown",
1063 }
1064}
1065
1066const fn bouten_position_str(p: BoutenPosition) -> &'static str {
1067 match p {
1068 BoutenPosition::Right => "right",
1069 BoutenPosition::Left => "left",
1070 _ => "unknown",
1071 }
1072}
1073
1074const fn section_kind_subtype(kind: SectionKind) -> &'static str {
1075 match kind {
1076 SectionKind::Choho => "choho",
1077 SectionKind::Dan => "dan",
1078 SectionKind::Spread => "spread",
1079 _ => "unknown",
1080 }
1081}
1082
1083const fn container_subtype(kind: ContainerKind) -> &'static str {
1084 match kind {
1085 ContainerKind::Indent { .. } => "indent",
1086 ContainerKind::Warichu => "warichu",
1087 ContainerKind::Keigakomi => "keigakomi",
1088 ContainerKind::AlignEnd { .. } => "alignEnd",
1089 _ => "unknown",
1090 }
1091}
1092
1093const fn container_indent_level(kind: ContainerKind) -> Option<u32> {
1094 match kind {
1098 ContainerKind::Indent { amount } => Some(amount as u32),
1099 ContainerKind::AlignEnd { offset } => Some(offset as u32),
1100 _ => None,
1101 }
1102}
1103
1104const fn annotation_kind_resolved(k: AnnotationKind) -> Option<&'static str> {
1105 match k {
1111 AnnotationKind::Unknown => Some("unknown"),
1112 AnnotationKind::AsIs => Some("asIs"),
1113 AnnotationKind::TextualNote => Some("textualNote"),
1114 AnnotationKind::InvalidRubySpan => Some("invalidRubySpan"),
1115 AnnotationKind::WarichuOpen => Some("warichuOpen"),
1116 AnnotationKind::WarichuClose => Some("warichuClose"),
1117 _ => None,
1118 }
1119}
1120
1121fn table_align(a: TableAlignment) -> IrTableAlign {
1122 match a {
1123 TableAlignment::Left => IrTableAlign::Left,
1124 TableAlignment::Center => IrTableAlign::Center,
1125 TableAlignment::Right => IrTableAlign::Right,
1126 TableAlignment::None => IrTableAlign::Default,
1127 }
1128}
1129
1130fn sourcepos_to_range(s: &Sourcepos) -> Option<Range> {
1131 let from = to_u32(
1136 s.start
1137 .line
1138 .saturating_sub(1)
1139 .saturating_mul(1024)
1140 .saturating_add(s.start.column.saturating_sub(1)),
1141 );
1142 let to = to_u32(
1143 s.end
1144 .line
1145 .saturating_sub(1)
1146 .saturating_mul(1024)
1147 .saturating_add(s.end.column.saturating_sub(1)),
1148 );
1149 (to >= from).then_some(Range { from, to })
1150}
1151
1152#[cfg(test)]
1153mod tests {
1154 use super::*;
1166 use aozora_syntax::AlignEnd;
1167 use comrak::nodes::{LineColumn, Sourcepos};
1168
1169 #[test]
1170 fn bouten_kind_str_covers_every_upstream_variant() {
1171 let cases = [
1172 (BoutenKind::Goma, "goma"),
1173 (BoutenKind::WhiteSesame, "whiteSesame"),
1174 (BoutenKind::Circle, "circle"),
1175 (BoutenKind::WhiteCircle, "whiteCircle"),
1176 (BoutenKind::DoubleCircle, "doubleCircle"),
1177 (BoutenKind::Janome, "janome"),
1178 (BoutenKind::Cross, "cross"),
1179 (BoutenKind::WhiteTriangle, "whiteTriangle"),
1180 (BoutenKind::WavyLine, "wavyLine"),
1181 (BoutenKind::UnderLine, "underLine"),
1182 (BoutenKind::DoubleUnderLine, "doubleUnderLine"),
1183 ];
1184 for (kind, expected) in cases {
1185 assert_eq!(bouten_kind_str(kind), expected);
1186 }
1187 }
1188
1189 #[test]
1190 fn bouten_position_str_covers_left_and_right() {
1191 assert_eq!(bouten_position_str(BoutenPosition::Right), "right");
1192 assert_eq!(bouten_position_str(BoutenPosition::Left), "left");
1193 }
1194
1195 #[test]
1196 fn section_kind_subtype_covers_every_upstream_variant() {
1197 assert_eq!(section_kind_subtype(SectionKind::Choho), "choho");
1198 assert_eq!(section_kind_subtype(SectionKind::Dan), "dan");
1199 assert_eq!(section_kind_subtype(SectionKind::Spread), "spread");
1200 }
1201
1202 #[test]
1203 fn container_subtype_and_indent_level_round_trip_each_variant() {
1204 let indent = ContainerKind::Indent { amount: 3 };
1205 assert_eq!(container_subtype(indent), "indent");
1206 assert_eq!(container_indent_level(indent), Some(3));
1207
1208 let align = ContainerKind::AlignEnd {
1209 offset: AlignEnd { offset: 1 }.offset,
1210 };
1211 assert_eq!(container_subtype(align), "alignEnd");
1212 assert_eq!(container_indent_level(align), Some(1));
1213
1214 assert_eq!(container_subtype(ContainerKind::Warichu), "warichu");
1215 assert!(container_indent_level(ContainerKind::Warichu).is_none());
1216 assert_eq!(container_subtype(ContainerKind::Keigakomi), "keigakomi");
1217 assert!(container_indent_level(ContainerKind::Keigakomi).is_none());
1218 }
1219
1220 #[test]
1221 fn annotation_kind_resolved_covers_every_named_variant() {
1222 assert_eq!(
1226 annotation_kind_resolved(AnnotationKind::Unknown),
1227 Some("unknown")
1228 );
1229 assert_eq!(annotation_kind_resolved(AnnotationKind::AsIs), Some("asIs"));
1230 assert_eq!(
1231 annotation_kind_resolved(AnnotationKind::TextualNote),
1232 Some("textualNote")
1233 );
1234 assert_eq!(
1235 annotation_kind_resolved(AnnotationKind::InvalidRubySpan),
1236 Some("invalidRubySpan")
1237 );
1238 assert_eq!(
1239 annotation_kind_resolved(AnnotationKind::WarichuOpen),
1240 Some("warichuOpen")
1241 );
1242 assert_eq!(
1243 annotation_kind_resolved(AnnotationKind::WarichuClose),
1244 Some("warichuClose")
1245 );
1246 }
1247
1248 #[test]
1249 fn resolved_to_string_handles_char_and_multi() {
1250 assert_eq!(resolved_to_string(Resolved::Char('a')), "a");
1251 assert_eq!(resolved_to_string(Resolved::Multi("か゚")), "か゚");
1252 }
1253
1254 #[test]
1255 fn project_content_inlines_covers_plain_segments_and_empty() {
1256 assert!(project_content_inlines(Content::Plain("")).is_empty());
1257 let plain = project_content_inlines(Content::Plain("hi"));
1258 assert!(matches!(
1259 plain.as_slice(),
1260 [IrInline::Text { value, .. }] if value == "hi"
1261 ));
1262
1263 let segs: &[Segment<'_>] = &[Segment::Text("a"), Segment::Text("")];
1264 let segs_out = project_content_inlines(Content::Segments(segs));
1265 assert_eq!(segs_out.len(), 1);
1267 }
1268
1269 #[test]
1270 fn content_to_string_concatenates_segment_text_only() {
1271 assert_eq!(content_to_string(Content::Plain("xyz")), "xyz");
1272 let segs: &[Segment<'_>] = &[Segment::Text("a"), Segment::Text("b")];
1273 assert_eq!(content_to_string(Content::Segments(segs)), "ab");
1274 }
1275
1276 #[test]
1277 fn table_align_maps_every_alignment() {
1278 assert!(matches!(
1279 table_align(TableAlignment::Left),
1280 IrTableAlign::Left
1281 ));
1282 assert!(matches!(
1283 table_align(TableAlignment::Center),
1284 IrTableAlign::Center
1285 ));
1286 assert!(matches!(
1287 table_align(TableAlignment::Right),
1288 IrTableAlign::Right
1289 ));
1290 assert!(matches!(
1291 table_align(TableAlignment::None),
1292 IrTableAlign::Default
1293 ));
1294 }
1295
1296 #[test]
1297 fn sourcepos_to_range_returns_some_for_well_ordered_positions() {
1298 let pos = Sourcepos {
1299 start: LineColumn { line: 1, column: 1 },
1300 end: LineColumn { line: 1, column: 5 },
1301 };
1302 let range = sourcepos_to_range(&pos).expect("forward range");
1303 assert!(range.from <= range.to);
1304 }
1305
1306 #[test]
1307 fn sourcepos_to_range_returns_none_for_inverted_positions() {
1308 let pos = Sourcepos {
1313 start: LineColumn { line: 5, column: 5 },
1314 end: LineColumn { line: 1, column: 1 },
1315 };
1316 assert!(sourcepos_to_range(&pos).is_none());
1317 }
1318}