1#![forbid(unsafe_code)]
38
39mod code_block_mask;
40pub mod html;
41pub mod ir;
42mod post_process;
43mod sentinels;
44mod source_line_anchors;
45
46#[doc(hidden)]
47pub mod test_support;
48
49pub use aozora_pipeline::{
50 BLOCK_CLOSE_SENTINEL, BLOCK_LEAF_SENTINEL, BLOCK_OPEN_SENTINEL, INLINE_SENTINEL,
51};
52pub use aozora_spec::{Diagnostic, DiagnosticSource, Severity};
53
54use aozora_render::serialize as aozora_serialize;
55use aozora_syntax::borrowed::Arena;
56use comrak::nodes::AstNode;
57
58#[derive(Debug, Clone, Default)]
60pub struct Options<'c> {
61 pub comrak: comrak::Options<'c>,
62 pub aozora_enabled: bool,
68 pub source_line_anchors: bool,
78}
79
80impl Options<'_> {
81 #[must_use]
86 pub fn afm_default() -> Self {
87 let mut comrak = comrak::Options::default();
88 comrak.extension.strikethrough = true;
89 comrak.extension.table = true;
90 comrak.extension.autolink = true;
91 comrak.extension.tasklist = true;
92 comrak.render.hardbreaks = true;
93 Self {
94 comrak,
95 aozora_enabled: true,
96 source_line_anchors: false,
97 }
98 }
99
100 #[must_use]
104 pub fn commonmark_only() -> Self {
105 let mut comrak = comrak::Options::default();
106 comrak.render.r#unsafe = true;
107 Self {
108 comrak,
109 aozora_enabled: false,
110 source_line_anchors: false,
111 }
112 }
113
114 #[must_use]
117 pub fn gfm_only() -> Self {
118 let mut comrak = comrak::Options::default();
119 comrak.extension.strikethrough = true;
120 comrak.extension.table = true;
121 comrak.extension.autolink = true;
122 comrak.extension.tasklist = true;
123 comrak.extension.tagfilter = true;
124 comrak.render.r#unsafe = true;
125 Self {
126 comrak,
127 aozora_enabled: false,
128 source_line_anchors: false,
129 }
130 }
131
132 #[must_use]
141 pub fn with_source_line_anchors(mut self, on: bool) -> Self {
142 self.source_line_anchors = on;
143 self
144 }
145}
146
147#[derive(Debug)]
149pub struct Rendered {
150 pub html: String,
152 pub diagnostics: Vec<Diagnostic>,
155}
156
157#[derive(Debug)]
164pub struct RenderedIr {
165 pub ir: ir::IrDocument,
166 pub html: String,
167 pub diagnostics: Vec<Diagnostic>,
168}
169
170#[must_use]
191pub fn render_to_string(input: &str, options: &Options<'_>) -> Rendered {
192 if !options.aozora_enabled {
193 let comrak_arena = comrak::Arena::new();
194 let root = comrak::parse_document(&comrak_arena, input, &options.comrak);
195 let anchors = if options.source_line_anchors {
196 source_line_anchors::collect_top_level_lines(root)
197 } else {
198 Vec::new()
199 };
200 let mut html = String::new();
201 comrak::format_html(root, &options.comrak, &mut html)
202 .expect("formatting to a String never fails");
203 let final_html = if options.source_line_anchors {
204 source_line_anchors::inject_anchors(&html, &anchors)
205 } else {
206 html
207 };
208 return Rendered {
209 html: final_html,
210 diagnostics: Vec::new(),
211 };
212 }
213
214 let (masked_source, mask_originals) = code_block_mask::mask_code_block_triggers(input);
219
220 let arena = Arena::new();
221 let lex_out = aozora_pipeline::lex_into_arena(&masked_source, &arena);
222
223 let comrak_arena = comrak::Arena::new();
224 let root = comrak::parse_document(&comrak_arena, lex_out.normalized, &options.comrak);
225 let anchors = if options.source_line_anchors {
226 source_line_anchors::collect_top_level_lines(root)
227 } else {
228 Vec::new()
229 };
230 let mut comrak_html = String::new();
231 comrak::format_html(root, &options.comrak, &mut comrak_html)
232 .expect("formatting to a String never fails");
233
234 let spliced = post_process::splice_aozora_html(&comrak_html, &lex_out);
235 let unmasked = code_block_mask::unmask_html(&spliced, &mask_originals);
236 let html = if options.source_line_anchors {
237 source_line_anchors::inject_anchors(&unmasked, &anchors)
238 } else {
239 unmasked
240 };
241
242 Rendered {
243 html,
244 diagnostics: lex_out.diagnostics,
245 }
246}
247
248#[must_use]
268pub fn render_to_ir(input: &str, options: &Options<'_>) -> RenderedIr {
269 if !options.aozora_enabled {
270 let comrak_arena = comrak::Arena::new();
271 let root = comrak::parse_document(&comrak_arena, input, &options.comrak);
272 let ir_doc = ir::build_ir(root, None);
273 let anchors = if options.source_line_anchors {
274 source_line_anchors::collect_top_level_lines(root)
275 } else {
276 Vec::new()
277 };
278 let mut html = String::new();
279 comrak::format_html(root, &options.comrak, &mut html)
280 .expect("formatting to a String never fails");
281 let final_html = if options.source_line_anchors {
282 source_line_anchors::inject_anchors(&html, &anchors)
283 } else {
284 html
285 };
286 return RenderedIr {
287 ir: ir_doc,
288 html: final_html,
289 diagnostics: Vec::new(),
290 };
291 }
292
293 let (masked_source, mask_originals) = code_block_mask::mask_code_block_triggers(input);
294
295 let arena = Arena::new();
296 let lex_out = aozora_pipeline::lex_into_arena(&masked_source, &arena);
297
298 let comrak_arena = comrak::Arena::new();
299 let root = comrak::parse_document(&comrak_arena, lex_out.normalized, &options.comrak);
300 let ir_doc = ir::build_ir(root, Some(&lex_out));
301 let anchors = if options.source_line_anchors {
302 source_line_anchors::collect_top_level_lines(root)
303 } else {
304 Vec::new()
305 };
306 let mut comrak_html = String::new();
307 comrak::format_html(root, &options.comrak, &mut comrak_html)
308 .expect("formatting to a String never fails");
309
310 let spliced = post_process::splice_aozora_html(&comrak_html, &lex_out);
311 let unmasked = code_block_mask::unmask_html(&spliced, &mask_originals);
312 let html = if options.source_line_anchors {
313 source_line_anchors::inject_anchors(&unmasked, &anchors)
314 } else {
315 unmasked
316 };
317
318 RenderedIr {
319 ir: ir_doc,
320 html,
321 diagnostics: lex_out.diagnostics,
322 }
323}
324
325#[derive(Debug, Clone)]
334pub struct RenderedBlock {
335 pub ir: Vec<ir::IrBlock>,
336 pub html: String,
337 pub source_line: u32,
339}
340
341#[must_use]
360pub fn render_blocks_to_ir(
361 input: &str,
362 options: &Options<'_>,
363) -> (Vec<RenderedBlock>, Vec<Diagnostic>) {
364 if !options.aozora_enabled {
365 let comrak_arena = comrak::Arena::new();
366 let root = comrak::parse_document(&comrak_arena, input, &options.comrak);
367 let blocks = collect_rendered_blocks(root, options, None);
368 return (blocks, Vec::new());
369 }
370
371 let (masked_source, _mask_originals) = code_block_mask::mask_code_block_triggers(input);
372 let arena = Arena::new();
373 let lex_out = aozora_pipeline::lex_into_arena(&masked_source, &arena);
374 let comrak_arena = comrak::Arena::new();
375 let root = comrak::parse_document(&comrak_arena, lex_out.normalized, &options.comrak);
376 let blocks = collect_rendered_blocks(root, options, Some(&lex_out));
377 (blocks, lex_out.diagnostics)
378}
379
380fn collect_rendered_blocks<'a>(
381 root: &'a AstNode<'a>,
382 options: &Options<'_>,
383 lex_out: Option<&aozora_pipeline::BorrowedLexOutput<'a>>,
384) -> Vec<RenderedBlock> {
385 let mut ir_builder = ir::StreamingIrBuilder::new(lex_out);
391 let mut blocks = Vec::new();
392 for child in root.children() {
393 let data = child.data.borrow();
394 let line = u32::try_from(data.sourcepos.start.line)
395 .unwrap_or(u32::MAX)
396 .max(1);
397 drop(data);
398 let ir_blocks = ir_builder.walk_block(child);
399 let mut block_html = String::new();
400 comrak::format_html(child, &options.comrak, &mut block_html)
401 .expect("formatting a String never fails");
402 let html_final = if let Some(lo) = lex_out {
403 post_process::splice_aozora_html(&block_html, lo)
404 } else {
405 block_html
406 };
407 blocks.push(RenderedBlock {
408 ir: ir_blocks,
409 html: html_final,
410 source_line: line,
411 });
412 }
413 blocks
414}
415
416#[must_use]
424pub fn serialize(input: &str) -> String {
425 let arena = Arena::new();
426 let lex_out = aozora_pipeline::lex_into_arena(input, &arena);
427 aozora_serialize::serialize(&lex_out)
428}
429
430#[cfg(test)]
431mod tests {
432 use super::*;
433
434 #[test]
435 fn plain_text_round_trips_through_html() {
436 let r = render_to_string("hello, world", &Options::afm_default());
437 assert!(r.html.contains("hello, world"), "html: {}", r.html);
438 assert!(r.diagnostics.is_empty());
439 }
440
441 #[test]
442 fn plain_text_serialize_returns_input_unchanged() {
443 assert_eq!(serialize("plain text"), "plain text");
444 }
445
446 #[test]
447 fn ruby_renders_as_html_ruby_element() {
448 let r = render_to_string("|青梅《おうめ》へ", &Options::afm_default());
449 assert!(r.html.contains("<ruby>"), "html: {}", r.html);
450 assert!(r.html.contains("青梅"));
451 assert!(r.html.contains("おうめ"));
452 assert!(!r.html.contains("[#"));
454 }
455
456 #[test]
457 fn page_break_promotes_and_does_not_leak_brackets() {
458 let r = render_to_string("前[#改ページ]後", &Options::afm_default());
459 assert!(!r.html.contains("[#"), "html: {}", r.html);
460 }
461
462 #[test]
463 fn unknown_annotation_keeps_brackets_inside_wrapper() {
464 let r = render_to_string("前[#ほげふが]後", &Options::afm_default());
465 assert!(
469 !contains_bare_bracket(&r.html),
470 "bare bracket leaked in: {}",
471 r.html
472 );
473 }
474
475 #[test]
476 fn commonmark_passes_through_with_heading_intact() {
477 let r = render_to_string("# Hello\n\nworld", &Options::afm_default());
478 assert!(r.html.contains("<h1>Hello</h1>"), "html: {}", r.html);
479 assert!(r.html.contains("world"));
480 }
481
482 #[test]
483 fn gfm_only_options_have_aozora_disabled_and_gfm_extensions_enabled() {
484 let opts = Options::gfm_only();
485 assert!(!opts.aozora_enabled, "gfm_only must skip the aozora pass");
486 assert!(opts.comrak.extension.strikethrough);
487 assert!(opts.comrak.extension.table);
488 assert!(opts.comrak.extension.autolink);
489 assert!(opts.comrak.extension.tasklist);
490 assert!(opts.comrak.extension.tagfilter);
491 assert!(opts.comrak.render.r#unsafe);
492 }
493
494 #[test]
495 fn gfm_only_renders_strikethrough_and_does_not_recognise_ruby() {
496 let opts = Options::gfm_only();
501 let html = render_to_string("~~strike~~ |青梅《おうめ》", &opts).html;
502 assert!(html.contains("<del>strike</del>"), "html: {html}");
503 assert!(
504 html.contains("|青梅"),
505 "ruby trigger must survive raw: {html}"
506 );
507 assert!(
508 !html.contains("<ruby>"),
509 "ruby must NOT render in gfm-only: {html}"
510 );
511 }
512
513 #[test]
514 fn contains_bare_bracket_helper_detects_leaked_marker() {
515 assert!(contains_bare_bracket("plain [# leak"));
519 assert!(!contains_bare_bracket(
520 "<span class=\"afm-annotation\" hidden>[#</span>"
521 ));
522 assert!(!contains_bare_bracket("no marker at all"));
523 }
524
525 fn contains_bare_bracket(html: &str) -> bool {
528 let needle = "[#";
529 let wrapper_open = "afm-annotation";
530 let mut pos = 0;
531 while let Some(idx) = html[pos..].find(needle) {
532 let abs = pos + idx;
533 let prefix = &html[..abs];
534 let last_open = prefix.rfind('<').unwrap_or(0);
535 let last_close = prefix.rfind('>').unwrap_or(0);
536 let inside_tag = last_open > last_close;
537 let in_wrapper = prefix.contains(wrapper_open);
538 if !inside_tag && !in_wrapper {
539 return true;
540 }
541 pos = abs + needle.len();
542 }
543 false
544 }
545}