ruma_html/sanitizer_config/
clean.rs
1use html5ever::{tendril::StrTendril, Attribute, LocalName};
2use phf::{phf_map, phf_set, Map, Set};
3use wildmatch::WildMatch;
4
5use crate::{ElementData, Html, HtmlSanitizerMode, NodeData, NodeRef, SanitizerConfig};
6
7static ALLOWED_ELEMENTS_STRICT: Set<&str> = phf_set! {
9 "del", "h1", "h2", "h3", "h4", "h5", "h6", "blockquote", "p", "a",
10 "ul", "ol", "sup", "sub", "li", "b", "i", "u", "strong", "em", "s",
11 "code", "hr", "br", "div", "table", "thead", "tbody", "tr", "th", "td",
12 "caption", "pre", "span", "img", "details", "summary", "mx-reply",
13};
14
15const RICH_REPLY_ELEMENT_NAME: &str = "mx-reply";
17
18static DEPRECATED_ELEMENTS: Map<&str, &str> = phf_map! {
20 "font" => "span",
21 "strike" => "s",
22};
23
24static ALLOWED_ATTRIBUTES_STRICT: Map<&str, &Set<&str>> = phf_map! {
26 "span" => &ALLOWED_ATTRIBUTES_SPAN_STRICT,
27 "a" => &ALLOWED_ATTRIBUTES_A_STRICT,
28 "img" => &ALLOWED_ATTRIBUTES_IMG_STRICT,
29 "ol" => &ALLOWED_ATTRIBUTES_OL_STRICT,
30 "code" => &ALLOWED_ATTRIBUTES_CODE_STRICT,
31 "div" => &ALLOWED_ATTRIBUTES_DIV_STRICT,
32};
33static ALLOWED_ATTRIBUTES_SPAN_STRICT: Set<&str> =
34 phf_set! { "data-mx-bg-color", "data-mx-color", "data-mx-spoiler", "data-mx-maths" };
35static ALLOWED_ATTRIBUTES_A_STRICT: Set<&str> = phf_set! { "target", "href" };
36static ALLOWED_ATTRIBUTES_IMG_STRICT: Set<&str> =
37 phf_set! { "width", "height", "alt", "title", "src" };
38static ALLOWED_ATTRIBUTES_OL_STRICT: Set<&str> = phf_set! { "start" };
39static ALLOWED_ATTRIBUTES_CODE_STRICT: Set<&str> = phf_set! { "class" };
40static ALLOWED_ATTRIBUTES_DIV_STRICT: Set<&str> = phf_set! { "data-mx-maths" };
41
42static DEPRECATED_ATTRS: Map<&str, &Map<&str, &str>> = phf_map! {
45 "font" => &DEPRECATED_ATTRIBUTES_FONT,
46};
47static DEPRECATED_ATTRIBUTES_FONT: Map<&str, &str> = phf_map! { "color" => "data-mx-color" };
48
49static ALLOWED_SCHEMES_STRICT: Map<&str, &Map<&str, &Set<&str>>> = phf_map! {
51 "a" => &ALLOWED_SCHEMES_A_STRICT,
52 "img" => &ALLOWED_SCHEMES_IMG_STRICT,
53};
54static ALLOWED_SCHEMES_A_STRICT: Map<&str, &Set<&str>> = phf_map! {
55 "href" => &ALLOWED_SCHEMES_A_HREF_STRICT,
56};
57pub(crate) static ALLOWED_SCHEMES_A_HREF_STRICT: Set<&str> =
58 phf_set! { "http", "https", "ftp", "mailto", "magnet" };
59static ALLOWED_SCHEMES_IMG_STRICT: Map<&str, &Set<&str>> = phf_map! {
60 "src" => &ALLOWED_SCHEMES_IMG_SRC_STRICT,
61};
62static ALLOWED_SCHEMES_IMG_SRC_STRICT: Set<&str> = phf_set! { "mxc" };
63
64static ALLOWED_SCHEMES_COMPAT: Map<&str, &Map<&str, &Set<&str>>> = phf_map! {
75 "a" => &ALLOWED_SCHEMES_A_COMPAT,
76};
77static ALLOWED_SCHEMES_A_COMPAT: Map<&str, &Set<&str>> = phf_map! {
78 "href" => &ALLOWED_SCHEMES_A_HREF_COMPAT,
79};
80pub(crate) static ALLOWED_SCHEMES_A_HREF_COMPAT: Set<&str> = phf_set! { "matrix" };
81
82static ALLOWED_CLASSES_STRICT: Map<&str, &Set<&str>> =
84 phf_map! { "code" => &ALLOWED_CLASSES_CODE_STRICT };
85static ALLOWED_CLASSES_CODE_STRICT: Set<&str> = phf_set! { "language-*" };
86
87const MAX_DEPTH_STRICT: u32 = 100;
89
90impl SanitizerConfig {
91 fn use_strict(&self) -> bool {
93 self.mode.is_some()
94 }
95
96 fn use_compat(&self) -> bool {
98 self.mode.is_some_and(|m| m == HtmlSanitizerMode::Compat)
99 }
100
101 fn max_depth_value(&self) -> Option<u32> {
103 self.max_depth.or_else(|| self.use_strict().then_some(MAX_DEPTH_STRICT))
104 }
105
106 pub(crate) fn clean(&self, html: &Html) {
108 for child in html.children() {
109 self.clean_node(child, 0);
110 }
111 }
112
113 fn clean_node(&self, node: NodeRef, depth: u32) {
114 let node = self.apply_replacements(node);
115
116 let action = self.node_action(&node, depth);
117
118 if action != NodeAction::Remove {
119 for child in node.children() {
120 if action == NodeAction::Ignore {
121 child.insert_before_sibling(&node);
122 }
123
124 self.clean_node(child, depth + 1);
125 }
126 }
127
128 if matches!(action, NodeAction::Ignore | NodeAction::Remove) {
129 node.detach();
130 } else if let Some(data) = node.as_element() {
131 self.clean_element_attributes(data);
132 }
133 }
134
135 fn apply_replacements(&self, node: NodeRef) -> NodeRef {
139 let mut element_replacement = None;
140
141 if let NodeData::Element(ElementData { name, attrs, .. }) = node.data() {
142 let element_name = name.local.as_ref();
143
144 let list_replacements =
146 self.replace_attrs.as_ref().and_then(|list| list.content.get(element_name));
147 let list_is_override =
148 self.replace_attrs.as_ref().map(|list| list.is_override()).unwrap_or_default();
149 let mode_replacements = (!list_is_override && self.use_strict())
150 .then(|| DEPRECATED_ATTRS.get(element_name))
151 .flatten();
152
153 if list_replacements.is_some() || mode_replacements.is_some() {
154 let mut attrs = attrs.borrow_mut();
155 *attrs = attrs
156 .clone()
157 .into_iter()
158 .map(|mut attr| {
159 let attr_name = attr.name.local.as_ref();
160
161 let attr_replacement = list_replacements
162 .and_then(|s| s.get(attr_name))
163 .or_else(|| mode_replacements.and_then(|s| s.get(attr_name)))
164 .copied();
165
166 if let Some(attr_replacement) = attr_replacement {
167 attr.name.local = LocalName::from(attr_replacement);
168 }
169
170 attr
171 })
172 .collect();
173 }
174
175 element_replacement = self
177 .replace_elements
178 .as_ref()
179 .and_then(|list| list.content.get(element_name))
180 .copied();
181
182 if element_replacement.is_none() {
183 let list_is_override = self
184 .replace_elements
185 .as_ref()
186 .map(|list| list.is_override())
187 .unwrap_or_default();
188 element_replacement = (!list_is_override && self.use_strict())
189 .then(|| DEPRECATED_ELEMENTS.get(element_name))
190 .flatten()
191 .copied();
192 }
193 }
194
195 if let Some(element_replacement) = element_replacement {
196 node.replace_with_element_name(LocalName::from(element_replacement))
197 } else {
198 node
199 }
200 }
201
202 fn node_action(&self, node: &NodeRef, depth: u32) -> NodeAction {
203 match node.data() {
204 NodeData::Element(ElementData { name, attrs, .. }) => {
205 let element_name = name.local.as_ref();
206 let attrs = attrs.borrow();
207
208 if self.remove_elements.as_ref().is_some_and(|set| set.contains(element_name)) {
210 return NodeAction::Remove;
211 }
212 if self.remove_reply_fallback && element_name == RICH_REPLY_ELEMENT_NAME {
213 return NodeAction::Remove;
214 }
215 if self.max_depth_value().is_some_and(|max| depth >= max) {
216 return NodeAction::Remove;
217 }
218
219 if self.ignore_elements.as_ref().is_some_and(|set| set.contains(element_name)) {
221 return NodeAction::Ignore;
222 }
223
224 if self.allow_elements.is_some() || self.use_strict() {
226 let list_allowed = self
227 .allow_elements
228 .as_ref()
229 .is_some_and(|list| list.content.contains(element_name));
230 let list_is_override = self
231 .allow_elements
232 .as_ref()
233 .map(|list| list.is_override())
234 .unwrap_or_default();
235 let mode_allowed = !list_is_override
236 && self.use_strict()
237 && ALLOWED_ELEMENTS_STRICT.contains(element_name);
238
239 if !list_allowed && !mode_allowed {
240 return NodeAction::Ignore;
241 }
242 }
243
244 if let Some(deny_schemes) =
246 self.deny_schemes.as_ref().and_then(|map| map.get(element_name))
247 {
248 for attr in attrs.iter() {
249 let value = &attr.value;
250 let attr_name = attr.name.local.as_ref();
251
252 if let Some(schemes) = deny_schemes.get(attr_name) {
253 if schemes.iter().any(|scheme| value.starts_with(&format!("{scheme}:")))
255 {
256 return NodeAction::Ignore;
257 }
258 }
259 }
260 }
261
262 if self.allow_schemes.is_none() && !self.use_strict() {
263 return NodeAction::None;
265 }
266
267 let list_element_schemes =
269 self.allow_schemes.as_ref().and_then(|list| list.content.get(element_name));
270 let list_is_override =
271 self.allow_schemes.as_ref().map(|list| list.is_override()).unwrap_or_default();
272 let strict_mode_element_schemes = (!list_is_override && self.use_strict())
273 .then(|| ALLOWED_SCHEMES_STRICT.get(element_name))
274 .flatten();
275 let compat_mode_element_schemes = (!list_is_override && self.use_compat())
276 .then(|| ALLOWED_SCHEMES_COMPAT.get(element_name))
277 .flatten();
278
279 if list_element_schemes.is_none()
280 && strict_mode_element_schemes.is_none()
281 && compat_mode_element_schemes.is_none()
282 {
283 return NodeAction::None;
285 }
286
287 for attr in attrs.iter() {
288 let value = &attr.value;
289 let attr_name = attr.name.local.as_ref();
290
291 let list_attr_schemes = list_element_schemes.and_then(|map| map.get(attr_name));
292 let strict_mode_attr_schemes =
293 strict_mode_element_schemes.and_then(|map| map.get(attr_name));
294 let compat_mode_attr_schemes =
295 compat_mode_element_schemes.and_then(|map| map.get(attr_name));
296
297 if list_attr_schemes.is_none()
298 && strict_mode_attr_schemes.is_none()
299 && compat_mode_attr_schemes.is_none()
300 {
301 return NodeAction::None;
303 }
304
305 let mut allowed_schemes = list_attr_schemes
306 .into_iter()
307 .flatten()
308 .chain(strict_mode_attr_schemes.map(|set| set.iter()).into_iter().flatten())
309 .chain(
310 compat_mode_attr_schemes.map(|set| set.iter()).into_iter().flatten(),
311 );
312
313 if !allowed_schemes.any(|scheme| value.starts_with(&format!("{scheme}:"))) {
315 return NodeAction::Ignore;
316 }
317 }
318
319 NodeAction::None
320 }
321 NodeData::Text(_) => NodeAction::None,
322 _ => NodeAction::Remove,
323 }
324 }
325
326 fn clean_element_attributes(&self, data: &ElementData) {
327 let ElementData { name, attrs } = data;
328 let element_name = name.local.as_ref();
329 let mut attrs = attrs.borrow_mut();
330
331 let list_remove_attrs = self.remove_attrs.as_ref().and_then(|map| map.get(element_name));
332
333 let whitelist_attrs = self.allow_attrs.is_some() || self.use_strict();
334 let list_allow_attrs =
335 self.allow_attrs.as_ref().and_then(|list| list.content.get(element_name));
336 let list_is_override =
337 self.allow_attrs.as_ref().map(|list| list.is_override()).unwrap_or_default();
338 let mode_allow_attrs = (!list_is_override && self.use_strict())
339 .then(|| ALLOWED_ATTRIBUTES_STRICT.get(element_name))
340 .flatten();
341
342 let list_remove_classes =
343 self.remove_classes.as_ref().and_then(|map| map.get(element_name));
344
345 let whitelist_classes = self.allow_classes.is_some() || self.use_strict();
346 let list_allow_classes =
347 self.allow_classes.as_ref().and_then(|list| list.content.get(element_name));
348 let list_is_override =
349 self.allow_classes.as_ref().map(|list| list.is_override()).unwrap_or_default();
350 let mode_allow_classes = (!list_is_override && self.use_strict())
351 .then(|| ALLOWED_CLASSES_STRICT.get(element_name))
352 .flatten();
353
354 let actions: Vec<_> = attrs
355 .iter()
356 .filter_map(|attr| {
357 let value = &attr.value;
358 let attr_name = attr.name.local.as_ref();
359
360 if list_remove_attrs.is_some_and(|set| set.contains(attr_name)) {
362 return Some(AttributeAction::Remove(attr.to_owned()));
363 }
364
365 if whitelist_attrs {
367 let list_allowed = list_allow_attrs.is_some_and(|set| set.contains(attr_name));
368 let mode_allowed = mode_allow_attrs.is_some_and(|set| set.contains(attr_name));
369
370 if !list_allowed && !mode_allowed {
371 return Some(AttributeAction::Remove(attr.to_owned()));
372 }
373 }
374
375 if attr_name == "class" {
377 let mut classes = value.split_whitespace().collect::<Vec<_>>();
378 let initial_len = classes.len();
379
380 if let Some(remove_classes) = list_remove_classes {
382 classes.retain(|class| {
383 for remove_class in remove_classes {
384 if WildMatch::new(remove_class).matches(class) {
385 return false;
386 }
387 }
388
389 true
390 });
391 }
392
393 if whitelist_classes {
395 classes.retain(|class| {
396 let allow_classes = list_allow_classes
397 .map(|set| set.iter())
398 .into_iter()
399 .flatten()
400 .chain(
401 mode_allow_classes.map(|set| set.iter()).into_iter().flatten(),
402 );
403
404 for allow_class in allow_classes {
405 if WildMatch::new(allow_class).matches(class) {
406 return true;
407 }
408 }
409
410 false
411 });
412 }
413
414 if classes.len() == initial_len {
415 return None;
417 }
418
419 if classes.is_empty() {
420 return Some(AttributeAction::Remove(attr.to_owned()));
421 } else {
422 let new_class = classes.join(" ");
423 return Some(AttributeAction::ReplaceValue(
424 attr.to_owned(),
425 new_class.into(),
426 ));
427 }
428 }
429
430 None
431 })
432 .collect();
433
434 for action in actions {
435 match action {
436 AttributeAction::ReplaceValue(attr, value) => {
437 if let Some(mut attr) = attrs.take(&attr) {
438 attr.value = value;
439 attrs.insert(attr);
440 }
441 }
442 AttributeAction::Remove(attr) => {
443 attrs.remove(&attr);
444 }
445 }
446 }
447 }
448}
449
450#[derive(Debug, PartialEq, Eq)]
452enum NodeAction {
453 None,
455
456 Ignore,
458
459 Remove,
461}
462
463#[derive(Debug)]
465enum AttributeAction {
466 ReplaceValue(Attribute, StrTendril),
468
469 Remove(Attribute),
471}