afm_markdown/
sentinels.rs1use aozora_pipeline::{
27 BLOCK_CLOSE_SENTINEL, BLOCK_LEAF_SENTINEL, BLOCK_OPEN_SENTINEL, BorrowedLexOutput,
28 INLINE_SENTINEL,
29};
30use aozora_spec::NormalizedOffset;
31use aozora_syntax::borrowed::NodeRef;
32use comrak::nodes::{AstNode, NodeValue};
33
34#[derive(Debug, Clone, Copy, PartialEq, Eq)]
36pub(crate) enum BlockSentinelKind {
37 Leaf,
38 Open,
39 Close,
40}
41
42impl BlockSentinelKind {
43 #[inline]
46 pub(crate) const fn from_char(ch: char) -> Option<Self> {
47 match ch {
48 BLOCK_LEAF_SENTINEL => Some(Self::Leaf),
49 BLOCK_OPEN_SENTINEL => Some(Self::Open),
50 BLOCK_CLOSE_SENTINEL => Some(Self::Close),
51 _ => None,
52 }
53 }
54}
55
56#[inline]
64pub(crate) const fn is_sentinel_char(ch: char) -> bool {
65 (ch as u32).wrapping_sub(INLINE_SENTINEL as u32) < 4
66}
67
68pub(crate) fn sole_block_sentinel(inner: &str) -> Option<BlockSentinelKind> {
72 let trimmed = inner.trim_matches(|c: char| matches!(c, ' ' | '\t' | '\n' | '\r'));
73 let mut chars = trimmed.chars();
74 let first = chars.next()?;
75 if chars.next().is_some() {
76 return None;
77 }
78 BlockSentinelKind::from_char(first)
79}
80
81pub(crate) fn paragraph_sole_block_sentinel<'a>(
91 node: &'a AstNode<'a>,
92) -> Option<BlockSentinelKind> {
93 let mut found: Option<BlockSentinelKind> = None;
94 if walk_text_only_descendants(node, &mut |s| {
95 for ch in s.chars() {
96 if matches!(ch, ' ' | '\t' | '\n' | '\r') {
97 continue;
98 }
99 let Some(kind) = BlockSentinelKind::from_char(ch) else {
100 return ControlFlow::Break;
101 };
102 if found.is_some() {
103 return ControlFlow::Break;
104 }
105 found = Some(kind);
106 }
107 ControlFlow::Continue
108 }) {
109 found
110 } else {
111 None
112 }
113}
114
115enum ControlFlow {
117 Continue,
118 Break,
119}
120
121fn walk_text_only_descendants<'a, F>(node: &'a AstNode<'a>, visit: &mut F) -> bool
127where
128 F: FnMut(&str) -> ControlFlow,
129{
130 for child in node.children() {
131 let data = child.data.borrow();
132 match &data.value {
133 NodeValue::Text(s) => {
134 let s = s.clone();
135 drop(data);
136 match visit(&s) {
137 ControlFlow::Continue => {}
138 ControlFlow::Break => return false,
139 }
140 if child.first_child().is_some() && !walk_text_only_descendants(child, visit) {
141 return false;
142 }
143 }
144 _ => return false,
145 }
146 }
147 true
148}
149
150pub(crate) fn for_each_text_descendant<'a, F>(node: &'a AstNode<'a>, mut visit: F)
157where
158 F: FnMut(&str),
159{
160 visit_text_inner(node, &mut visit);
161}
162
163fn visit_text_inner<'a, F>(node: &'a AstNode<'a>, visit: &mut F)
164where
165 F: FnMut(&str),
166{
167 for child in node.children() {
168 let data = child.data.borrow();
169 if let NodeValue::Text(s) = &data.value {
170 let s = s.clone();
171 drop(data);
172 visit(&s);
173 } else {
174 let has_descendants = child.first_child().is_some();
175 drop(data);
176 if has_descendants {
177 visit_text_inner(child, visit);
178 }
179 }
180 }
181}
182
183pub(crate) fn flatten_registry_in_source_order<'a>(
190 lex_out: &BorrowedLexOutput<'a>,
191) -> Vec<NodeRef<'a>> {
192 if lex_out.registry.is_empty() {
193 return Vec::new();
194 }
195 let mut out = Vec::with_capacity(lex_out.registry.len());
196 for (idx, ch) in lex_out.normalized.char_indices() {
197 if !is_sentinel_char(ch) {
198 continue;
199 }
200 let pos = u32::try_from(idx).expect("normalized text fits u32 (Phase 0 cap)");
201 if let Some(node_ref) = lex_out.registry.node_at(NormalizedOffset(pos)) {
202 out.push(node_ref);
203 }
204 }
205 out
206}
207
208pub(crate) struct SentinelCursor<'a, 'src> {
215 nodes: &'a [NodeRef<'src>],
216 idx: usize,
217}
218
219impl<'a, 'src> SentinelCursor<'a, 'src> {
220 pub(crate) fn new(nodes: &'a [NodeRef<'src>]) -> Self {
221 Self { nodes, idx: 0 }
222 }
223
224 pub(crate) fn peek(&self, offset: usize) -> Option<NodeRef<'src>> {
227 self.nodes.get(self.idx + offset).copied()
228 }
229
230 pub(crate) fn next(&mut self) -> Option<NodeRef<'src>> {
232 let n = self.nodes.get(self.idx).copied();
233 if n.is_some() {
234 self.idx += 1;
235 }
236 n
237 }
238
239 pub(crate) fn advance(&mut self, n: usize) {
241 self.idx = self.idx.saturating_add(n).min(self.nodes.len());
242 }
243
244 pub(crate) fn position(&self) -> usize {
248 self.idx
249 }
250
251 pub(crate) fn with_position(nodes: &'a [NodeRef<'src>], pos: usize) -> Self {
254 Self {
255 nodes,
256 idx: pos.min(nodes.len()),
257 }
258 }
259}
260
261#[cfg(test)]
262mod tests {
263 use super::*;
264
265 #[test]
266 fn is_sentinel_char_recognises_all_four() {
267 for ch in [
268 INLINE_SENTINEL,
269 BLOCK_LEAF_SENTINEL,
270 BLOCK_OPEN_SENTINEL,
271 BLOCK_CLOSE_SENTINEL,
272 ] {
273 assert!(is_sentinel_char(ch), "{ch:?} should be a sentinel");
274 }
275 }
276
277 #[test]
278 fn is_sentinel_char_rejects_neighbours() {
279 assert!(!is_sentinel_char('\u{E000}'));
281 assert!(!is_sentinel_char('\u{E005}'));
282 assert!(!is_sentinel_char('a'));
283 assert!(!is_sentinel_char('\0'));
284 }
285
286 #[test]
287 fn block_sentinel_kind_from_char_round_trips() {
288 assert_eq!(
289 BlockSentinelKind::from_char(BLOCK_LEAF_SENTINEL),
290 Some(BlockSentinelKind::Leaf)
291 );
292 assert_eq!(
293 BlockSentinelKind::from_char(BLOCK_OPEN_SENTINEL),
294 Some(BlockSentinelKind::Open)
295 );
296 assert_eq!(
297 BlockSentinelKind::from_char(BLOCK_CLOSE_SENTINEL),
298 Some(BlockSentinelKind::Close)
299 );
300 assert!(BlockSentinelKind::from_char(INLINE_SENTINEL).is_none());
302 assert!(BlockSentinelKind::from_char('a').is_none());
303 }
304
305 #[test]
306 fn sole_block_sentinel_accepts_block_with_whitespace_around() {
307 assert_eq!(
308 sole_block_sentinel(&format!("\n{BLOCK_LEAF_SENTINEL}\n")),
309 Some(BlockSentinelKind::Leaf)
310 );
311 }
312
313 #[test]
314 fn sole_block_sentinel_rejects_inline() {
315 assert!(sole_block_sentinel(&format!("{INLINE_SENTINEL}")).is_none());
316 }
317
318 #[test]
319 fn sole_block_sentinel_rejects_multiple() {
320 let s = format!("{BLOCK_LEAF_SENTINEL}{BLOCK_OPEN_SENTINEL}");
321 assert!(sole_block_sentinel(&s).is_none());
322 }
323
324 #[test]
325 fn sentinel_cursor_peeks_and_consumes_in_order() {
326 use aozora_syntax::ContainerKind;
328 use aozora_syntax::borrowed::AozoraNode;
329 let entries: &[NodeRef<'static>] = &[
330 NodeRef::Inline(AozoraNode::PageBreak),
331 NodeRef::BlockOpen(ContainerKind::Keigakomi),
332 NodeRef::BlockClose(ContainerKind::Keigakomi),
333 ];
334 let mut cursor = SentinelCursor::new(entries);
335 assert!(matches!(
336 cursor.peek(0),
337 Some(NodeRef::Inline(AozoraNode::PageBreak))
338 ));
339 assert!(matches!(
340 cursor.peek(2),
341 Some(NodeRef::BlockClose(ContainerKind::Keigakomi))
342 ));
343 assert!(cursor.peek(3).is_none());
344 let _ = cursor.next();
345 assert!(matches!(
346 cursor.next(),
347 Some(NodeRef::BlockOpen(ContainerKind::Keigakomi))
348 ));
349 cursor.advance(99); assert!(cursor.next().is_none());
351 }
352}