ruma_html/sanitizer_config.rs
1#![allow(clippy::disallowed_types)]
2
3use std::collections::{HashMap, HashSet};
4
5pub(crate) mod clean;
6
7use crate::HtmlSanitizerMode;
8
9/// Configuration to sanitize HTML elements and attributes.
10#[derive(Debug, Default, Clone)]
11pub struct SanitizerConfig {
12 /// The mode of the sanitizer, if any.
13 mode: Option<HtmlSanitizerMode>,
14
15 /// Change to the list of elements to replace.
16 ///
17 /// The content is a map of element name to their replacement's element name.
18 replace_elements: Option<List<HashMap<&'static str, &'static str>>>,
19
20 /// Elements to remove.
21 remove_elements: Option<HashSet<&'static str>>,
22
23 /// Whether to remove the rich reply fallback.
24 remove_reply_fallback: bool,
25
26 /// Elements to ignore.
27 ignore_elements: Option<HashSet<&'static str>>,
28
29 /// Change to the list of elements to allow.
30 allow_elements: Option<List<HashSet<&'static str>>>,
31
32 /// Change to the list of attributes to replace per element.
33 ///
34 /// The content is a map of element name to a map of attribute name to their replacement's
35 /// attribute name.
36 replace_attrs: Option<List<HashMap<&'static str, HashMap<&'static str, &'static str>>>>,
37
38 /// Removed attributes per element.
39 remove_attrs: Option<HashMap<&'static str, HashSet<&'static str>>>,
40
41 /// Change to the list of allowed attributes per element.
42 allow_attrs: Option<List<HashMap<&'static str, HashSet<&'static str>>>>,
43
44 /// Denied URI schemes per attribute per element.
45 ///
46 /// The content is a map of element name to a map of attribute name to a set of schemes.
47 deny_schemes: Option<HashMap<&'static str, HashMap<&'static str, HashSet<&'static str>>>>,
48
49 /// Change to the list of allowed URI schemes per attribute per element.
50 ///
51 /// The content is a map of element name to a map of attribute name to a set of schemes.
52 #[allow(clippy::type_complexity)]
53 allow_schemes:
54 Option<List<HashMap<&'static str, HashMap<&'static str, HashSet<&'static str>>>>>,
55
56 /// Removed classes per element.
57 ///
58 /// The content is a map of element name to a set of classes.
59 remove_classes: Option<HashMap<&'static str, HashSet<&'static str>>>,
60
61 /// Change to the list of allowed classes per element.
62 ///
63 /// The content is a map of element name to a set of classes.
64 allow_classes: Option<List<HashMap<&'static str, HashSet<&'static str>>>>,
65
66 /// Maximum nesting level of the elements.
67 max_depth: Option<u32>,
68}
69
70impl SanitizerConfig {
71 /// Constructs an empty `SanitizerConfig` that will not filter any element or attribute.
72 ///
73 /// The list of allowed and replaced elements can be changed with [`Self::allow_elements()`],
74 /// [`Self::replace_elements()`], [`Self::ignore_elements()`], [`Self::remove_elements()`],
75 /// [`Self::remove_reply_fallback()`].
76 ///
77 /// The list of allowed and replaced attributes can be changed with
78 /// [`Self::allow_attributes()`], [`Self::replace_attributes()`],
79 /// [`Self::remove_attributes()`], [`Self::allow_schemes()`], [`Self::deny_schemes()`],
80 /// [`Self::allow_classes()`], [`Self::remove_classes()`].
81 pub fn new() -> Self {
82 Self::default()
83 }
84
85 /// Constructs a `SanitizerConfig` with the given mode for filtering elements and attributes.
86 ///
87 /// The mode defines the basic list of allowed and replaced elements and attributes and the
88 /// maximum nesting level of elements.
89 ///
90 /// The list of allowed and replaced elements can be changed with [`Self::allow_elements()`],
91 /// [`Self::replace_elements()`], [`Self::ignore_elements()`], [`Self::remove_elements()`],
92 /// [`Self::remove_reply_fallback()`].
93 ///
94 /// The list of allowed and replaced attributes can be changed with
95 /// [`Self::allow_attributes()`], [`Self::replace_attributes()`],
96 /// [`Self::remove_attributes()`], [`Self::allow_schemes()`], [`Self::deny_schemes()`],
97 /// [`Self::allow_classes()`], [`Self::remove_classes()`].
98 pub fn with_mode(mode: HtmlSanitizerMode) -> Self {
99 Self { mode: Some(mode), ..Default::default() }
100 }
101
102 /// Constructs a `SanitizerConfig` that will filter elements and attributes not [suggested in
103 /// the Matrix specification].
104 ///
105 /// The list of allowed and replaced elements can be changed with [`Self::allow_elements()`],
106 /// [`Self::replace_elements()`], [`Self::ignore_elements()`], [`Self::remove_elements()`],
107 /// [`Self::remove_reply_fallback()`].
108 ///
109 /// The list of allowed and replaced attributes can be changed with
110 /// [`Self::allow_attributes()`], [`Self::replace_attributes()`],
111 /// [`Self::remove_attributes()`], [`Self::allow_schemes()`], [`Self::deny_schemes()`],
112 /// [`Self::allow_classes()`], [`Self::remove_classes()`].
113 ///
114 /// This is the same as calling `SanitizerConfig::with_mode(HtmlSanitizerMode::Strict)`.
115 ///
116 /// [suggested in the Matrix specification]: https://spec.matrix.org/latest/client-server-api/#mroommessage-msgtypes
117 pub fn strict() -> Self {
118 Self::with_mode(HtmlSanitizerMode::Strict)
119 }
120
121 /// Constructs a `SanitizerConfig` that will filter elements and attributes not [suggested in
122 /// the Matrix specification], except a few for improved compatibility:
123 ///
124 /// * The `matrix` scheme is allowed in links.
125 ///
126 /// The list of allowed elements can be changed with [`Self::allow_elements()`],
127 /// [`Self::replace_elements()`], [`Self::ignore_elements()`], [`Self::remove_elements()`],
128 /// [`Self::remove_reply_fallback()`].
129 ///
130 /// The list of allowed attributes can be changed with [`Self::allow_attributes()`],
131 /// [`Self::replace_attributes()`], [`Self::remove_attributes()`], [`Self::allow_schemes()`],
132 /// [`Self::deny_schemes()`], [`Self::allow_classes()`], [`Self::remove_classes()`].
133 ///
134 /// This is the same as calling `SanitizerConfig::with_mode(HtmlSanitizerMode::Compat)`.
135 ///
136 /// [listed in the Matrix specification]: https://spec.matrix.org/latest/client-server-api/#mroommessage-msgtypes
137 pub fn compat() -> Self {
138 Self::with_mode(HtmlSanitizerMode::Compat)
139 }
140
141 /// Change the list of replaced HTML elements.
142 ///
143 /// The given list is added to or replaces the list of replacements of the current mode,
144 /// depending on the [`ListBehavior`].
145 ///
146 /// The replacement occurs before the removal, so the replaced element should not be in
147 /// the allowed list of elements, but the replacement element should.
148 ///
149 /// # Parameters
150 ///
151 /// * `elements`: The list of element names replacements.
152 pub fn replace_elements(
153 mut self,
154 elements: impl IntoIterator<Item = NameReplacement>,
155 behavior: ListBehavior,
156 ) -> Self {
157 let content = elements.into_iter().map(|r| r.to_tuple()).collect();
158 self.replace_elements = Some(List { content, behavior });
159 self
160 }
161
162 /// Remove the given HTML elements.
163 ///
164 /// When an element is removed, the element and its children are dropped. If you want to remove
165 /// an element but keep its children, use [`SanitizerConfig::ignore_elements`] or
166 /// [`SanitizerConfig::allow_elements`].
167 ///
168 /// Removing elements has a higher priority than ignoring or allowing. So if an element is in
169 /// this list, it will always be removed.
170 ///
171 /// # Parameters
172 ///
173 /// * `elements`: The list of element names to remove.
174 pub fn remove_elements(mut self, elements: impl IntoIterator<Item = &'static str>) -> Self {
175 self.remove_elements = Some(elements.into_iter().collect());
176 self
177 }
178
179 /// Remove the [rich reply] fallback.
180 ///
181 /// Calling this allows to remove the `mx-reply` element in addition to the list of elements to
182 /// remove.
183 ///
184 /// Removing elements has a higher priority than ignoring or allowing. So if this settings is
185 /// set, `mx-reply` will always be removed.
186 ///
187 /// [rich reply]: https://spec.matrix.org/latest/client-server-api/#rich-replies
188 pub fn remove_reply_fallback(mut self) -> Self {
189 self.remove_reply_fallback = true;
190 self
191 }
192
193 /// Ignore the given HTML elements.
194 ///
195 /// When an element is ignored, the element is dropped and replaced by its children. If you want
196 /// to drop an element and its children, use [`SanitizerConfig::remove_elements`].
197 ///
198 /// Removing elements has a lower priority than removing but a higher priority than allowing.
199 ///
200 /// # Parameters
201 ///
202 /// * `elements`: The list of element names to ignore.
203 pub fn ignore_elements(mut self, elements: impl IntoIterator<Item = &'static str>) -> Self {
204 self.ignore_elements = Some(elements.into_iter().collect());
205 self
206 }
207
208 /// Change the list of allowed HTML elements.
209 ///
210 /// The given list is added to or replaces the list of allowed elements of the current
211 /// mode, depending on the [`ListBehavior`].
212 ///
213 /// If an element is not allowed, it is ignored. If no mode is set and no elements are
214 /// explicitly allowed, all elements are allowed.
215 ///
216 /// # Parameters
217 ///
218 /// * `elements`: The list of element names.
219 pub fn allow_elements(
220 mut self,
221 elements: impl IntoIterator<Item = &'static str>,
222 behavior: ListBehavior,
223 ) -> Self {
224 let content = elements.into_iter().collect();
225 self.allow_elements = Some(List { content, behavior });
226 self
227 }
228
229 /// Change the list of replaced attributes per HTML element.
230 ///
231 /// The given list is added to or replaces the list of replacements of the current mode,
232 /// depending on the [`ListBehavior`].
233 ///
234 /// The replacement occurs before the removal, so the replaced attribute should not be in the
235 /// list of allowed attributes, but the replacement attribute should. Attribute replacement
236 /// occurs before element replacement, so if you want to replace an attribute on an element
237 /// that is set to be replaced, you must use the replaced element's name, not the name of its
238 /// replacement.
239 ///
240 /// # Parameters
241 ///
242 /// * `attrs`: The list of element's attributes replacements.
243 pub fn replace_attributes<'a>(
244 mut self,
245 attrs: impl IntoIterator<Item = ElementAttributesReplacement<'a>>,
246 behavior: ListBehavior,
247 ) -> Self {
248 let content = attrs.into_iter().map(|r| r.to_tuple()).collect();
249 self.replace_attrs = Some(List { content, behavior });
250 self
251 }
252
253 /// Remove the given attributes per HTML element.
254 ///
255 /// Removing attributes has a higher priority than allowing. So if an attribute is in
256 /// this list, it will always be removed.
257 ///
258 /// # Parameters
259 ///
260 /// * `attrs`: The list of attributes per element. The value of `parent` is the element name,
261 /// and `properties` contains attribute names.
262 pub fn remove_attributes<'a>(
263 mut self,
264 attrs: impl IntoIterator<Item = PropertiesNames<'a>>,
265 ) -> Self {
266 self.remove_attrs = Some(attrs.into_iter().map(|a| a.to_tuple()).collect());
267 self
268 }
269
270 /// Change the list of allowed attributes per HTML element.
271 ///
272 /// The given list is added to or replaces the list of allowed attributes of the current
273 /// mode, depending on the [`ListBehavior`].
274 ///
275 /// If an attribute is not allowed, it is removed. If no mode is set and no attributes are
276 /// explicitly allowed, all attributes are allowed.
277 ///
278 /// # Parameters
279 ///
280 /// * `attrs`: The list of attributes per element. The value of `parent` is the element name,
281 /// and `properties` contains attribute names.
282 pub fn allow_attributes<'a>(
283 mut self,
284 attrs: impl IntoIterator<Item = PropertiesNames<'a>>,
285 behavior: ListBehavior,
286 ) -> Self {
287 let content = attrs.into_iter().map(|a| a.to_tuple()).collect();
288 self.allow_attrs = Some(List { content, behavior });
289 self
290 }
291
292 /// Deny the given URI schemes per attribute per HTML element.
293 ///
294 /// Denying schemes has a higher priority than allowing. So if a scheme is in
295 /// this list, it will always be denied.
296 ///
297 /// If a scheme is denied, its element is removed, because it is deemed that the element will
298 /// not be usable without it URI.
299 ///
300 /// # Parameters
301 ///
302 /// * `schemes`: The list of schemes per attribute per element.
303 pub fn deny_schemes<'a>(
304 mut self,
305 schemes: impl IntoIterator<Item = ElementAttributesSchemes<'a>>,
306 ) -> Self {
307 self.deny_schemes = Some(schemes.into_iter().map(|s| s.to_tuple()).collect());
308 self
309 }
310
311 /// Change the list of allowed schemes per attribute per HTML element.
312 ///
313 /// The given list is added to or replaces the list of allowed schemes of the current
314 /// mode, depending on the [`ListBehavior`].
315 ///
316 /// If a scheme is not allowed, it is denied. If a scheme is denied, its element is ignored,
317 /// because it is deemed that the element will not be usable without it URI. If no mode is set
318 /// and no schemes are explicitly allowed, all schemes are allowed.
319 ///
320 /// # Parameters
321 ///
322 /// * `schemes`: The list of schemes per attribute per element.
323 pub fn allow_schemes<'a>(
324 mut self,
325 schemes: impl IntoIterator<Item = ElementAttributesSchemes<'a>>,
326 behavior: ListBehavior,
327 ) -> Self {
328 let content = schemes.into_iter().map(|s| s.to_tuple()).collect();
329 self.allow_schemes = Some(List { content, behavior });
330 self
331 }
332
333 /// Deny the given classes per HTML element.
334 ///
335 /// Removing classes has a higher priority than allowing. So if a class is in
336 /// this list, it will always be removed.
337 ///
338 /// If all the classes of a `class` attribute are removed, the whole attribute is removed.
339 ///
340 /// In the list of classes, the names must match the full class name. `*` can be used as a
341 /// wildcard for any number of characters. So `language` will only match a class named
342 /// `language`, and `language-*` will match any class name starting with `language-`.
343 ///
344 /// # Parameters
345 ///
346 /// * `attrs`: The list of classes per element. The value of `parent` is the element name, and
347 /// `properties` contains classes.
348 pub fn remove_classes<'a>(
349 mut self,
350 classes: impl IntoIterator<Item = PropertiesNames<'a>>,
351 ) -> Self {
352 self.remove_classes = Some(classes.into_iter().map(|c| c.to_tuple()).collect());
353 self
354 }
355
356 /// Change the list of allowed classes per HTML element.
357 ///
358 /// The given list is added, removed or replaces the list of allowed classes of the current
359 /// mode, depending on the [`ListBehavior`].
360 ///
361 /// If a class is not allowed, it is removed. If all the classes of a `class` attribute are
362 /// removed, the whole attribute is removed. If no mode is set and no classes are explicitly
363 /// allowed, all classes are allowed.
364 ///
365 /// In the list of classes, the names must match the full class name. `*` can be used as a
366 /// wildcard for any number of characters. So `language` will only match a class named
367 /// `language`, and `language-*` will match any class name starting with `language-`.
368 ///
369 /// # Parameters
370 ///
371 /// * `attrs`: The list of classes per element. The value of `parent` is the element name, and
372 /// `properties` contains classes.
373 pub fn allow_classes<'a>(
374 mut self,
375 classes: impl IntoIterator<Item = PropertiesNames<'a>>,
376 behavior: ListBehavior,
377 ) -> Self {
378 let content = classes.into_iter().map(|c| c.to_tuple()).collect();
379 self.allow_classes = Some(List { content, behavior });
380 self
381 }
382
383 /// The maximum nesting level of HTML elements.
384 ///
385 /// This overrides the maximum depth set by the mode, if one is set.
386 ///
387 /// All elements that are deeper than the maximum depth will be removed. If no mode is set and
388 /// no maximum depth is explicitly set, elements are not filtered by their nesting level.
389 ///
390 /// # Parameters
391 ///
392 /// * `depth`: The maximum nesting level allowed.
393 pub fn max_depth(mut self, depth: u32) -> Self {
394 self.max_depth = Some(depth);
395 self
396 }
397}
398
399/// A list with a behavior.
400#[derive(Debug, Clone)]
401struct List<T> {
402 /// The content of this list.
403 content: T,
404
405 /// The behavior of this list.
406 behavior: ListBehavior,
407}
408
409impl<T> List<T> {
410 /// Whether this is `ListBehavior::Override`.
411 fn is_override(&self) -> bool {
412 self.behavior == ListBehavior::Override
413 }
414}
415
416/// The behavior of the setting.
417#[derive(Debug, Clone, Copy, PartialEq, Eq)]
418#[allow(clippy::exhaustive_enums)]
419pub enum ListBehavior {
420 /// The list replaces the default list of the current mode, if one is set.
421 ///
422 /// If no mode is set, this is the full allow list.
423 Override,
424
425 /// The list is added to the default list of the current mode, if one is set.
426 ///
427 /// If no mode is set, this is the full allow list.
428 Add,
429}
430
431/// The replacement of a name.
432#[derive(Debug, Clone, Copy)]
433#[allow(clippy::exhaustive_structs)]
434pub struct NameReplacement {
435 /// The name to replace.
436 pub old: &'static str,
437 /// The name of the replacement.
438 pub new: &'static str,
439}
440
441impl NameReplacement {
442 fn to_tuple(self) -> (&'static str, &'static str) {
443 (self.old, self.new)
444 }
445}
446
447/// A list of properties names for a parent.
448#[allow(clippy::exhaustive_structs)]
449#[derive(Debug, Clone, Copy)]
450pub struct PropertiesNames<'a> {
451 /// The name of the parent.
452 pub parent: &'static str,
453 /// The list of properties names.
454 pub properties: &'a [&'static str],
455}
456
457impl PropertiesNames<'_> {
458 fn to_tuple(self) -> (&'static str, HashSet<&'static str>) {
459 let set = self.properties.iter().copied().collect();
460 (self.parent, set)
461 }
462}
463
464/// The replacement of an element's attributes.
465#[allow(clippy::exhaustive_structs)]
466#[derive(Debug, Clone, Copy)]
467pub struct ElementAttributesReplacement<'a> {
468 /// The name of the element.
469 pub element: &'static str,
470 /// The list of attributes replacements.
471 pub replacements: &'a [NameReplacement],
472}
473
474impl ElementAttributesReplacement<'_> {
475 fn to_tuple(self) -> (&'static str, HashMap<&'static str, &'static str>) {
476 let map = self.replacements.iter().map(|r| r.to_tuple()).collect();
477 (self.element, map)
478 }
479}
480
481/// An element's attributes' URI schemes.
482#[allow(clippy::exhaustive_structs)]
483#[derive(Debug, Clone, Copy)]
484pub struct ElementAttributesSchemes<'a> {
485 /// The name of the element.
486 pub element: &'static str,
487 /// The list of allowed URI schemes per attribute name.
488 ///
489 /// The value of the `parent` is the attribute name and the properties are schemes.
490 pub attr_schemes: &'a [PropertiesNames<'a>],
491}
492
493impl ElementAttributesSchemes<'_> {
494 fn to_tuple(self) -> (&'static str, HashMap<&'static str, HashSet<&'static str>>) {
495 let map = self.attr_schemes.iter().map(|s| s.to_tuple()).collect();
496 (self.element, map)
497 }
498}