ruma_html/
sanitizer_config.rs

1#![allow(clippy::disallowed_types)]
2
3use std::collections::{HashMap, HashSet};
4
5pub(crate) mod clean;
6
7use crate::HtmlSanitizerMode;
8
9/// Configuration to sanitize HTML elements and attributes.
10#[derive(Debug, Default, Clone)]
11pub struct SanitizerConfig {
12    /// The mode of the sanitizer, if any.
13    mode: Option<HtmlSanitizerMode>,
14
15    /// Change to the list of elements to replace.
16    ///
17    /// The content is a map of element name to their replacement's element name.
18    replace_elements: Option<List<HashMap<&'static str, &'static str>>>,
19
20    /// Elements to remove.
21    remove_elements: Option<HashSet<&'static str>>,
22
23    /// Whether to remove the rich reply fallback.
24    remove_reply_fallback: bool,
25
26    /// Elements to ignore.
27    ignore_elements: Option<HashSet<&'static str>>,
28
29    /// Change to the list of elements to allow.
30    allow_elements: Option<List<HashSet<&'static str>>>,
31
32    /// Change to the list of attributes to replace per element.
33    ///
34    /// The content is a map of element name to a map of attribute name to their replacement's
35    /// attribute name.
36    replace_attrs: Option<List<HashMap<&'static str, HashMap<&'static str, &'static str>>>>,
37
38    /// Removed attributes per element.
39    remove_attrs: Option<HashMap<&'static str, HashSet<&'static str>>>,
40
41    /// Change to the list of allowed attributes per element.
42    allow_attrs: Option<List<HashMap<&'static str, HashSet<&'static str>>>>,
43
44    /// Denied URI schemes per attribute per element.
45    ///
46    /// The content is a map of element name to a map of attribute name to a set of schemes.
47    deny_schemes: Option<HashMap<&'static str, HashMap<&'static str, HashSet<&'static str>>>>,
48
49    /// Change to the list of allowed URI schemes per attribute per element.
50    ///
51    /// The content is a map of element name to a map of attribute name to a set of schemes.
52    #[allow(clippy::type_complexity)]
53    allow_schemes:
54        Option<List<HashMap<&'static str, HashMap<&'static str, HashSet<&'static str>>>>>,
55
56    /// Removed classes per element.
57    ///
58    /// The content is a map of element name to a set of classes.
59    remove_classes: Option<HashMap<&'static str, HashSet<&'static str>>>,
60
61    /// Change to the list of allowed classes per element.
62    ///
63    /// The content is a map of element name to a set of classes.
64    allow_classes: Option<List<HashMap<&'static str, HashSet<&'static str>>>>,
65
66    /// Maximum nesting level of the elements.
67    max_depth: Option<u32>,
68}
69
70impl SanitizerConfig {
71    /// Constructs an empty `SanitizerConfig` that will not filter any element or attribute.
72    ///
73    /// The list of allowed and replaced elements can be changed with [`Self::allow_elements()`],
74    /// [`Self::replace_elements()`], [`Self::ignore_elements()`], [`Self::remove_elements()`],
75    /// [`Self::remove_reply_fallback()`].
76    ///
77    /// The list of allowed and replaced attributes can be changed with
78    /// [`Self::allow_attributes()`], [`Self::replace_attributes()`],
79    /// [`Self::remove_attributes()`], [`Self::allow_schemes()`], [`Self::deny_schemes()`],
80    /// [`Self::allow_classes()`], [`Self::remove_classes()`].
81    pub fn new() -> Self {
82        Self::default()
83    }
84
85    /// Constructs a `SanitizerConfig` with the given mode for filtering elements and attributes.
86    ///
87    /// The mode defines the basic list of allowed and replaced elements and attributes and the
88    /// maximum nesting level of elements.
89    ///
90    /// The list of allowed and replaced elements can be changed with [`Self::allow_elements()`],
91    /// [`Self::replace_elements()`], [`Self::ignore_elements()`], [`Self::remove_elements()`],
92    /// [`Self::remove_reply_fallback()`].
93    ///
94    /// The list of allowed and replaced attributes can be changed with
95    /// [`Self::allow_attributes()`], [`Self::replace_attributes()`],
96    /// [`Self::remove_attributes()`], [`Self::allow_schemes()`], [`Self::deny_schemes()`],
97    /// [`Self::allow_classes()`], [`Self::remove_classes()`].
98    pub fn with_mode(mode: HtmlSanitizerMode) -> Self {
99        Self { mode: Some(mode), ..Default::default() }
100    }
101
102    /// Constructs a `SanitizerConfig` that will filter elements and attributes not [suggested in
103    /// the Matrix specification].
104    ///
105    /// The list of allowed and replaced elements can be changed with [`Self::allow_elements()`],
106    /// [`Self::replace_elements()`], [`Self::ignore_elements()`], [`Self::remove_elements()`],
107    /// [`Self::remove_reply_fallback()`].
108    ///
109    /// The list of allowed and replaced attributes can be changed with
110    /// [`Self::allow_attributes()`], [`Self::replace_attributes()`],
111    /// [`Self::remove_attributes()`], [`Self::allow_schemes()`], [`Self::deny_schemes()`],
112    /// [`Self::allow_classes()`], [`Self::remove_classes()`].
113    ///
114    /// This is the same as calling `SanitizerConfig::with_mode(HtmlSanitizerMode::Strict)`.
115    ///
116    /// [suggested in the Matrix specification]: https://spec.matrix.org/latest/client-server-api/#mroommessage-msgtypes
117    pub fn strict() -> Self {
118        Self::with_mode(HtmlSanitizerMode::Strict)
119    }
120
121    /// Constructs a `SanitizerConfig` that will filter elements and attributes not [suggested in
122    /// the Matrix specification], except a few for improved compatibility:
123    ///
124    /// * The `matrix` scheme is allowed in links.
125    ///
126    /// The list of allowed elements can be changed with [`Self::allow_elements()`],
127    /// [`Self::replace_elements()`], [`Self::ignore_elements()`], [`Self::remove_elements()`],
128    /// [`Self::remove_reply_fallback()`].
129    ///
130    /// The list of allowed attributes can be changed with [`Self::allow_attributes()`],
131    /// [`Self::replace_attributes()`], [`Self::remove_attributes()`], [`Self::allow_schemes()`],
132    /// [`Self::deny_schemes()`], [`Self::allow_classes()`], [`Self::remove_classes()`].
133    ///
134    /// This is the same as calling `SanitizerConfig::with_mode(HtmlSanitizerMode::Compat)`.
135    ///
136    /// [listed in the Matrix specification]: https://spec.matrix.org/latest/client-server-api/#mroommessage-msgtypes
137    pub fn compat() -> Self {
138        Self::with_mode(HtmlSanitizerMode::Compat)
139    }
140
141    /// Change the list of replaced HTML elements.
142    ///
143    /// The given list is added to or replaces the list of replacements of the current mode,
144    /// depending on the [`ListBehavior`].
145    ///
146    /// The replacement occurs before the removal, so the replaced element should not be in
147    /// the allowed list of elements, but the replacement element should.
148    ///
149    /// # Parameters
150    ///
151    /// * `elements`: The list of element names replacements.
152    pub fn replace_elements(
153        mut self,
154        elements: impl IntoIterator<Item = NameReplacement>,
155        behavior: ListBehavior,
156    ) -> Self {
157        let content = elements.into_iter().map(|r| r.to_tuple()).collect();
158        self.replace_elements = Some(List { content, behavior });
159        self
160    }
161
162    /// Remove the given HTML elements.
163    ///
164    /// When an element is removed, the element and its children are dropped. If you want to remove
165    /// an element but keep its children, use [`SanitizerConfig::ignore_elements`] or
166    /// [`SanitizerConfig::allow_elements`].
167    ///
168    /// Removing elements has a higher priority than ignoring or allowing. So if an element is in
169    /// this list, it will always be removed.
170    ///
171    /// # Parameters
172    ///
173    /// * `elements`: The list of element names to remove.
174    pub fn remove_elements(mut self, elements: impl IntoIterator<Item = &'static str>) -> Self {
175        self.remove_elements = Some(elements.into_iter().collect());
176        self
177    }
178
179    /// Remove the [rich reply] fallback.
180    ///
181    /// Calling this allows to remove the `mx-reply` element in addition to the list of elements to
182    /// remove.
183    ///
184    /// Removing elements has a higher priority than ignoring or allowing. So if this settings is
185    /// set, `mx-reply` will always be removed.
186    ///
187    /// [rich reply]: https://spec.matrix.org/latest/client-server-api/#rich-replies
188    pub fn remove_reply_fallback(mut self) -> Self {
189        self.remove_reply_fallback = true;
190        self
191    }
192
193    /// Ignore the given HTML elements.
194    ///
195    /// When an element is ignored, the element is dropped and replaced by its children. If you want
196    /// to drop an element and its children, use [`SanitizerConfig::remove_elements`].
197    ///
198    /// Removing elements has a lower priority than removing but a higher priority than allowing.
199    ///
200    /// # Parameters
201    ///
202    /// * `elements`: The list of element names to ignore.
203    pub fn ignore_elements(mut self, elements: impl IntoIterator<Item = &'static str>) -> Self {
204        self.ignore_elements = Some(elements.into_iter().collect());
205        self
206    }
207
208    /// Change the list of allowed HTML elements.
209    ///
210    /// The given list is added to or replaces the list of allowed elements of the current
211    /// mode, depending on the [`ListBehavior`].
212    ///
213    /// If an element is not allowed, it is ignored. If no mode is set and no elements are
214    /// explicitly allowed, all elements are allowed.
215    ///
216    /// # Parameters
217    ///
218    /// * `elements`: The list of element names.
219    pub fn allow_elements(
220        mut self,
221        elements: impl IntoIterator<Item = &'static str>,
222        behavior: ListBehavior,
223    ) -> Self {
224        let content = elements.into_iter().collect();
225        self.allow_elements = Some(List { content, behavior });
226        self
227    }
228
229    /// Change the list of replaced attributes per HTML element.
230    ///
231    /// The given list is added to or replaces the list of replacements of the current mode,
232    /// depending on the [`ListBehavior`].
233    ///
234    /// The replacement occurs before the removal, so the replaced attribute should not be in the
235    /// list of allowed attributes, but the replacement attribute should. Attribute replacement
236    /// occurs before element replacement, so if you want to replace an attribute on an element
237    /// that is set to be replaced, you must use the replaced element's name, not the name of its
238    /// replacement.
239    ///
240    /// # Parameters
241    ///
242    /// * `attrs`: The list of element's attributes replacements.
243    pub fn replace_attributes<'a>(
244        mut self,
245        attrs: impl IntoIterator<Item = ElementAttributesReplacement<'a>>,
246        behavior: ListBehavior,
247    ) -> Self {
248        let content = attrs.into_iter().map(|r| r.to_tuple()).collect();
249        self.replace_attrs = Some(List { content, behavior });
250        self
251    }
252
253    /// Remove the given attributes per HTML element.
254    ///
255    /// Removing attributes has a higher priority than allowing. So if an attribute is in
256    /// this list, it will always be removed.
257    ///
258    /// # Parameters
259    ///
260    /// * `attrs`: The list of attributes per element. The value of `parent` is the element name,
261    ///   and `properties` contains attribute names.
262    pub fn remove_attributes<'a>(
263        mut self,
264        attrs: impl IntoIterator<Item = PropertiesNames<'a>>,
265    ) -> Self {
266        self.remove_attrs = Some(attrs.into_iter().map(|a| a.to_tuple()).collect());
267        self
268    }
269
270    /// Change the list of allowed attributes per HTML element.
271    ///
272    /// The given list is added to or replaces the list of allowed attributes of the current
273    /// mode, depending on the [`ListBehavior`].
274    ///
275    /// If an attribute is not allowed, it is removed. If no mode is set and no attributes are
276    /// explicitly allowed, all attributes are allowed.
277    ///
278    /// # Parameters
279    ///
280    /// * `attrs`: The list of attributes per element. The value of `parent` is the element name,
281    ///   and `properties` contains attribute names.
282    pub fn allow_attributes<'a>(
283        mut self,
284        attrs: impl IntoIterator<Item = PropertiesNames<'a>>,
285        behavior: ListBehavior,
286    ) -> Self {
287        let content = attrs.into_iter().map(|a| a.to_tuple()).collect();
288        self.allow_attrs = Some(List { content, behavior });
289        self
290    }
291
292    /// Deny the given URI schemes per attribute per HTML element.
293    ///
294    /// Denying schemes has a higher priority than allowing. So if a scheme is in
295    /// this list, it will always be denied.
296    ///
297    /// If a scheme is denied, its element is removed, because it is deemed that the element will
298    /// not be usable without it URI.
299    ///
300    /// # Parameters
301    ///
302    /// * `schemes`: The list of schemes per attribute per element.
303    pub fn deny_schemes<'a>(
304        mut self,
305        schemes: impl IntoIterator<Item = ElementAttributesSchemes<'a>>,
306    ) -> Self {
307        self.deny_schemes = Some(schemes.into_iter().map(|s| s.to_tuple()).collect());
308        self
309    }
310
311    /// Change the list of allowed schemes per attribute per HTML element.
312    ///
313    /// The given list is added to or replaces the list of allowed schemes of the current
314    /// mode, depending on the [`ListBehavior`].
315    ///
316    /// If a scheme is not allowed, it is denied. If a scheme is denied, its element is ignored,
317    /// because it is deemed that the element will not be usable without it URI. If no mode is set
318    /// and no schemes are explicitly allowed, all schemes are allowed.
319    ///
320    /// # Parameters
321    ///
322    /// * `schemes`: The list of schemes per attribute per element.
323    pub fn allow_schemes<'a>(
324        mut self,
325        schemes: impl IntoIterator<Item = ElementAttributesSchemes<'a>>,
326        behavior: ListBehavior,
327    ) -> Self {
328        let content = schemes.into_iter().map(|s| s.to_tuple()).collect();
329        self.allow_schemes = Some(List { content, behavior });
330        self
331    }
332
333    /// Deny the given classes per HTML element.
334    ///
335    /// Removing classes has a higher priority than allowing. So if a class is in
336    /// this list, it will always be removed.
337    ///
338    /// If all the classes of a `class` attribute are removed, the whole attribute is removed.
339    ///
340    /// In the list of classes, the names must match the full class name. `*` can be used as a
341    /// wildcard for any number of characters. So `language` will only match a class named
342    /// `language`, and `language-*` will match any class name starting with `language-`.
343    ///
344    /// # Parameters
345    ///
346    /// * `attrs`: The list of classes per element. The value of `parent` is the element name, and
347    ///   `properties` contains classes.
348    pub fn remove_classes<'a>(
349        mut self,
350        classes: impl IntoIterator<Item = PropertiesNames<'a>>,
351    ) -> Self {
352        self.remove_classes = Some(classes.into_iter().map(|c| c.to_tuple()).collect());
353        self
354    }
355
356    /// Change the list of allowed classes per HTML element.
357    ///
358    /// The given list is added, removed or replaces the list of allowed classes of the current
359    /// mode, depending on the [`ListBehavior`].
360    ///
361    /// If a class is not allowed, it is removed. If all the classes of a `class` attribute are
362    /// removed, the whole attribute is removed. If no mode is set and no classes are explicitly
363    /// allowed, all classes are allowed.
364    ///
365    /// In the list of classes, the names must match the full class name. `*` can be used as a
366    /// wildcard for any number of characters. So `language` will only match a class named
367    /// `language`, and `language-*` will match any class name starting with `language-`.
368    ///
369    /// # Parameters
370    ///
371    /// * `attrs`: The list of classes per element. The value of `parent` is the element name, and
372    ///   `properties` contains classes.
373    pub fn allow_classes<'a>(
374        mut self,
375        classes: impl IntoIterator<Item = PropertiesNames<'a>>,
376        behavior: ListBehavior,
377    ) -> Self {
378        let content = classes.into_iter().map(|c| c.to_tuple()).collect();
379        self.allow_classes = Some(List { content, behavior });
380        self
381    }
382
383    /// The maximum nesting level of HTML elements.
384    ///
385    /// This overrides the maximum depth set by the mode, if one is set.
386    ///
387    /// All elements that are deeper than the maximum depth will be removed. If no mode is set and
388    /// no maximum depth is explicitly set, elements are not filtered by their nesting level.
389    ///
390    /// # Parameters
391    ///
392    /// * `depth`: The maximum nesting level allowed.
393    pub fn max_depth(mut self, depth: u32) -> Self {
394        self.max_depth = Some(depth);
395        self
396    }
397}
398
399/// A list with a behavior.
400#[derive(Debug, Clone)]
401struct List<T> {
402    /// The content of this list.
403    content: T,
404
405    /// The behavior of this list.
406    behavior: ListBehavior,
407}
408
409impl<T> List<T> {
410    /// Whether this is `ListBehavior::Override`.
411    fn is_override(&self) -> bool {
412        self.behavior == ListBehavior::Override
413    }
414}
415
416/// The behavior of the setting.
417#[derive(Debug, Clone, Copy, PartialEq, Eq)]
418#[allow(clippy::exhaustive_enums)]
419pub enum ListBehavior {
420    /// The list replaces the default list of the current mode, if one is set.
421    ///
422    /// If no mode is set, this is the full allow list.
423    Override,
424
425    /// The list is added to the default list of the current mode, if one is set.
426    ///
427    /// If no mode is set, this is the full allow list.
428    Add,
429}
430
431/// The replacement of a name.
432#[derive(Debug, Clone, Copy)]
433#[allow(clippy::exhaustive_structs)]
434pub struct NameReplacement {
435    /// The name to replace.
436    pub old: &'static str,
437    /// The name of the replacement.
438    pub new: &'static str,
439}
440
441impl NameReplacement {
442    fn to_tuple(self) -> (&'static str, &'static str) {
443        (self.old, self.new)
444    }
445}
446
447/// A list of properties names for a parent.
448#[allow(clippy::exhaustive_structs)]
449#[derive(Debug, Clone, Copy)]
450pub struct PropertiesNames<'a> {
451    /// The name of the parent.
452    pub parent: &'static str,
453    /// The list of properties names.
454    pub properties: &'a [&'static str],
455}
456
457impl PropertiesNames<'_> {
458    fn to_tuple(self) -> (&'static str, HashSet<&'static str>) {
459        let set = self.properties.iter().copied().collect();
460        (self.parent, set)
461    }
462}
463
464/// The replacement of an element's attributes.
465#[allow(clippy::exhaustive_structs)]
466#[derive(Debug, Clone, Copy)]
467pub struct ElementAttributesReplacement<'a> {
468    /// The name of the element.
469    pub element: &'static str,
470    /// The list of attributes replacements.
471    pub replacements: &'a [NameReplacement],
472}
473
474impl ElementAttributesReplacement<'_> {
475    fn to_tuple(self) -> (&'static str, HashMap<&'static str, &'static str>) {
476        let map = self.replacements.iter().map(|r| r.to_tuple()).collect();
477        (self.element, map)
478    }
479}
480
481/// An element's attributes' URI schemes.
482#[allow(clippy::exhaustive_structs)]
483#[derive(Debug, Clone, Copy)]
484pub struct ElementAttributesSchemes<'a> {
485    /// The name of the element.
486    pub element: &'static str,
487    /// The list of allowed URI schemes per attribute name.
488    ///
489    /// The value of the `parent` is the attribute name and the properties are schemes.
490    pub attr_schemes: &'a [PropertiesNames<'a>],
491}
492
493impl ElementAttributesSchemes<'_> {
494    fn to_tuple(self) -> (&'static str, HashMap<&'static str, HashSet<&'static str>>) {
495        let map = self.attr_schemes.iter().map(|s| s.to_tuple()).collect();
496        (self.element, map)
497    }
498}