afm_markdown/
source_line_anchors.rs1use comrak::nodes::AstNode;
28
29pub(crate) fn collect_top_level_lines<'a>(root: &'a AstNode<'a>) -> Vec<usize> {
31 let mut out = Vec::new();
32 for child in root.children() {
33 let line = child.data.borrow().sourcepos.start.line;
34 out.push(line.max(1));
37 }
38 out
39}
40
41pub(crate) fn inject_anchors(html: &str, lines: &[usize]) -> String {
46 if lines.is_empty() {
47 return html.to_owned();
48 }
49 let mut out = String::with_capacity(html.len() + lines.len() * 24);
50 let mut idx = 0_usize;
51 let bytes = html.as_bytes();
52 let mut next_line = 0_usize;
53 let mut depth: i32 = 0;
54 while idx < bytes.len() {
55 let b = bytes[idx];
56 if b == b'<' && idx + 1 < bytes.len() && bytes[idx + 1] != b'/' {
57 if let Some(tag_end) = find_tag_end(bytes, idx) {
60 let tag_slice = &html[idx..tag_end];
61 if depth == 0 && next_line < lines.len() && is_top_level_tag(tag_slice) {
62 out.push_str(&inject_attribute(tag_slice, lines[next_line]));
63 next_line += 1;
64 } else {
65 out.push_str(tag_slice);
66 }
67 if !tag_slice.ends_with("/>") && !is_void_tag(tag_slice) {
68 depth += 1;
69 }
70 idx = tag_end;
71 continue;
72 }
73 }
74 if b == b'<' && idx + 1 < bytes.len() && bytes[idx + 1] == b'/' {
75 if let Some(tag_end) = find_tag_end(bytes, idx) {
77 out.push_str(&html[idx..tag_end]);
78 depth = (depth - 1).max(0);
79 idx = tag_end;
80 continue;
81 }
82 }
83 out.push(b as char);
84 idx += 1;
85 }
86 out
87}
88
89fn find_tag_end(bytes: &[u8], start: usize) -> Option<usize> {
90 let mut i = start;
94 let mut in_quote: Option<u8> = None;
95 while i < bytes.len() {
96 let c = bytes[i];
97 match in_quote {
98 None => match c {
99 b'"' | b'\'' => in_quote = Some(c),
100 b'>' => return Some(i + 1),
101 _ => {}
102 },
103 Some(q) if q == c => in_quote = None,
104 _ => {}
105 }
106 i += 1;
107 }
108 None
109}
110
111fn is_top_level_tag(tag: &str) -> bool {
112 let name = tag_name(tag);
113 matches!(
114 name,
115 "p" | "h1"
116 | "h2"
117 | "h3"
118 | "h4"
119 | "h5"
120 | "h6"
121 | "ul"
122 | "ol"
123 | "blockquote"
124 | "pre"
125 | "table"
126 | "hr"
127 | "div"
128 | "section"
129 | "details"
130 )
131}
132
133fn is_void_tag(tag: &str) -> bool {
134 let name = tag_name(tag);
138 matches!(name, "hr" | "br" | "img" | "input")
139}
140
141fn tag_name(tag: &str) -> &str {
142 let body = tag.trim_start_matches('<').trim_end_matches('>');
143 let body = body.trim_start_matches('/');
144 body.split(|c: char| c.is_whitespace() || c == '>' || c == '/')
145 .next()
146 .unwrap_or("")
147}
148
149fn inject_attribute(tag: &str, line: usize) -> String {
150 if !tag.starts_with('<') {
151 return tag.to_owned();
152 }
153 let bytes = tag.as_bytes();
157 let mut i = 1; while i < bytes.len() {
159 let c = bytes[i];
160 if c == b' ' || c == b'\t' || c == b'/' || c == b'>' {
161 break;
162 }
163 i += 1;
164 }
165 let mut out = String::with_capacity(tag.len() + 28);
166 out.push_str(&tag[..i]);
167 out.push_str(" data-afm-source-line=\"");
168 out.push_str(&line.to_string());
169 out.push('"');
170 out.push_str(&tag[i..]);
171 out
172}
173
174#[cfg(test)]
175mod tests {
176 use super::*;
177
178 #[test]
179 fn injects_anchor_into_first_paragraph() {
180 let out = inject_anchors("<p>hello</p>", &[1]);
181 assert_eq!(out, r#"<p data-afm-source-line="1">hello</p>"#);
182 }
183
184 #[test]
185 fn injects_anchors_for_multiple_top_level_blocks() {
186 let out = inject_anchors("<h1>a</h1><p>b</p>", &[1, 3]);
187 assert!(out.contains(r#"<h1 data-afm-source-line="1">"#));
188 assert!(out.contains(r#"<p data-afm-source-line="3">"#));
189 }
190
191 #[test]
192 fn does_not_anchor_nested_blocks() {
193 let out = inject_anchors("<blockquote><p>x</p></blockquote>", &[1]);
195 assert!(out.contains(r#"<blockquote data-afm-source-line="1">"#));
196 assert!(!out.contains(r"<p data-afm-source-line="));
197 }
198
199 #[test]
200 fn no_op_when_lines_is_empty() {
201 let html = "<p>x</p>";
202 assert_eq!(inject_anchors(html, &[]), html);
203 }
204
205 #[test]
206 fn handles_void_tags_at_top_level() {
207 let out = inject_anchors("<hr><p>x</p>", &[1, 2]);
208 assert!(out.contains(r#"<hr data-afm-source-line="1">"#));
209 assert!(out.contains(r#"<p data-afm-source-line="2">"#));
210 }
211
212 #[test]
213 fn ignores_inline_tags() {
214 let out = inject_anchors("<p><strong>x</strong></p>", &[1]);
215 assert!(out.contains(r#"<p data-afm-source-line="1">"#));
216 assert!(!out.contains(r"<strong data-afm-source-line="));
217 }
218
219 #[test]
220 fn tag_name_extracts_the_lower_case_element_name() {
221 assert_eq!(tag_name("<p>"), "p");
222 assert_eq!(tag_name("<p class=\"x\">"), "p");
223 assert_eq!(tag_name("</p>"), "p");
224 assert_eq!(tag_name("<hr/>"), "hr");
225 }
226}