ruma_html/sanitizer_config/
clean.rsuse html5ever::{tendril::StrTendril, Attribute, LocalName};
use phf::{phf_map, phf_set, Map, Set};
use wildmatch::WildMatch;
use crate::{ElementData, Html, HtmlSanitizerMode, NodeData, NodeRef, SanitizerConfig};
static ALLOWED_ELEMENTS_STRICT: Set<&str> = phf_set! {
"del", "h1", "h2", "h3", "h4", "h5", "h6", "blockquote", "p", "a",
"ul", "ol", "sup", "sub", "li", "b", "i", "u", "strong", "em", "s",
"code", "hr", "br", "div", "table", "thead", "tbody", "tr", "th", "td",
"caption", "pre", "span", "img", "details", "summary", "mx-reply",
};
const RICH_REPLY_ELEMENT_NAME: &str = "mx-reply";
static DEPRECATED_ELEMENTS: Map<&str, &str> = phf_map! {
"font" => "span",
"strike" => "s",
};
static ALLOWED_ATTRIBUTES_STRICT: Map<&str, &Set<&str>> = phf_map! {
"span" => &ALLOWED_ATTRIBUTES_SPAN_STRICT,
"a" => &ALLOWED_ATTRIBUTES_A_STRICT,
"img" => &ALLOWED_ATTRIBUTES_IMG_STRICT,
"ol" => &ALLOWED_ATTRIBUTES_OL_STRICT,
"code" => &ALLOWED_ATTRIBUTES_CODE_STRICT,
"div" => &ALLOWED_ATTRIBUTES_DIV_STRICT,
};
static ALLOWED_ATTRIBUTES_SPAN_STRICT: Set<&str> =
phf_set! { "data-mx-bg-color", "data-mx-color", "data-mx-spoiler", "data-mx-maths" };
static ALLOWED_ATTRIBUTES_A_STRICT: Set<&str> = phf_set! { "target", "href" };
static ALLOWED_ATTRIBUTES_IMG_STRICT: Set<&str> =
phf_set! { "width", "height", "alt", "title", "src" };
static ALLOWED_ATTRIBUTES_OL_STRICT: Set<&str> = phf_set! { "start" };
static ALLOWED_ATTRIBUTES_CODE_STRICT: Set<&str> = phf_set! { "class" };
static ALLOWED_ATTRIBUTES_DIV_STRICT: Set<&str> = phf_set! { "data-mx-maths" };
static DEPRECATED_ATTRS: Map<&str, &Map<&str, &str>> = phf_map! {
"font" => &DEPRECATED_ATTRIBUTES_FONT,
};
static DEPRECATED_ATTRIBUTES_FONT: Map<&str, &str> = phf_map! { "color" => "data-mx-color" };
static ALLOWED_SCHEMES_STRICT: Map<&str, &Map<&str, &Set<&str>>> = phf_map! {
"a" => &ALLOWED_SCHEMES_A_STRICT,
"img" => &ALLOWED_SCHEMES_IMG_STRICT,
};
static ALLOWED_SCHEMES_A_STRICT: Map<&str, &Set<&str>> = phf_map! {
"href" => &ALLOWED_SCHEMES_A_HREF_STRICT,
};
pub(crate) static ALLOWED_SCHEMES_A_HREF_STRICT: Set<&str> =
phf_set! { "http", "https", "ftp", "mailto", "magnet" };
static ALLOWED_SCHEMES_IMG_STRICT: Map<&str, &Set<&str>> = phf_map! {
"src" => &ALLOWED_SCHEMES_IMG_SRC_STRICT,
};
static ALLOWED_SCHEMES_IMG_SRC_STRICT: Set<&str> = phf_set! { "mxc" };
static ALLOWED_SCHEMES_COMPAT: Map<&str, &Map<&str, &Set<&str>>> = phf_map! {
"a" => &ALLOWED_SCHEMES_A_COMPAT,
};
static ALLOWED_SCHEMES_A_COMPAT: Map<&str, &Set<&str>> = phf_map! {
"href" => &ALLOWED_SCHEMES_A_HREF_COMPAT,
};
pub(crate) static ALLOWED_SCHEMES_A_HREF_COMPAT: Set<&str> = phf_set! { "matrix" };
static ALLOWED_CLASSES_STRICT: Map<&str, &Set<&str>> =
phf_map! { "code" => &ALLOWED_CLASSES_CODE_STRICT };
static ALLOWED_CLASSES_CODE_STRICT: Set<&str> = phf_set! { "language-*" };
const MAX_DEPTH_STRICT: u32 = 100;
impl SanitizerConfig {
fn use_strict(&self) -> bool {
self.mode.is_some()
}
fn use_compat(&self) -> bool {
self.mode.is_some_and(|m| m == HtmlSanitizerMode::Compat)
}
fn max_depth_value(&self) -> Option<u32> {
self.max_depth.or_else(|| self.use_strict().then_some(MAX_DEPTH_STRICT))
}
pub(crate) fn clean(&self, html: &Html) {
for child in html.children() {
self.clean_node(child, 0);
}
}
fn clean_node(&self, node: NodeRef, depth: u32) {
let node = self.apply_replacements(node);
let action = self.node_action(&node, depth);
if action != NodeAction::Remove {
for child in node.children() {
if action == NodeAction::Ignore {
child.insert_before_sibling(&node);
}
self.clean_node(child, depth + 1);
}
}
if matches!(action, NodeAction::Ignore | NodeAction::Remove) {
node.detach();
} else if let Some(data) = node.as_element() {
self.clean_element_attributes(data);
}
}
fn apply_replacements(&self, node: NodeRef) -> NodeRef {
let mut element_replacement = None;
if let NodeData::Element(ElementData { name, attrs, .. }) = node.data() {
let element_name = name.local.as_ref();
let list_replacements =
self.replace_attrs.as_ref().and_then(|list| list.content.get(element_name));
let list_is_override =
self.replace_attrs.as_ref().map(|list| list.is_override()).unwrap_or_default();
let mode_replacements = (!list_is_override && self.use_strict())
.then(|| DEPRECATED_ATTRS.get(element_name))
.flatten();
if list_replacements.is_some() || mode_replacements.is_some() {
let mut attrs = attrs.borrow_mut();
*attrs = attrs
.clone()
.into_iter()
.map(|mut attr| {
let attr_name = attr.name.local.as_ref();
let attr_replacement = list_replacements
.and_then(|s| s.get(attr_name))
.or_else(|| mode_replacements.and_then(|s| s.get(attr_name)))
.copied();
if let Some(attr_replacement) = attr_replacement {
attr.name.local = LocalName::from(attr_replacement);
}
attr
})
.collect();
}
element_replacement = self
.replace_elements
.as_ref()
.and_then(|list| list.content.get(element_name))
.copied();
if element_replacement.is_none() {
let list_is_override = self
.replace_elements
.as_ref()
.map(|list| list.is_override())
.unwrap_or_default();
element_replacement = (!list_is_override && self.use_strict())
.then(|| DEPRECATED_ELEMENTS.get(element_name))
.flatten()
.copied();
}
}
if let Some(element_replacement) = element_replacement {
node.replace_with_element_name(LocalName::from(element_replacement))
} else {
node
}
}
fn node_action(&self, node: &NodeRef, depth: u32) -> NodeAction {
match node.data() {
NodeData::Element(ElementData { name, attrs, .. }) => {
let element_name = name.local.as_ref();
let attrs = attrs.borrow();
if self.remove_elements.as_ref().is_some_and(|set| set.contains(element_name)) {
return NodeAction::Remove;
}
if self.remove_reply_fallback && element_name == RICH_REPLY_ELEMENT_NAME {
return NodeAction::Remove;
}
if self.max_depth_value().is_some_and(|max| depth >= max) {
return NodeAction::Remove;
}
if self.ignore_elements.as_ref().is_some_and(|set| set.contains(element_name)) {
return NodeAction::Ignore;
}
if self.allow_elements.is_some() || self.use_strict() {
let list_allowed = self
.allow_elements
.as_ref()
.is_some_and(|list| list.content.contains(element_name));
let list_is_override = self
.allow_elements
.as_ref()
.map(|list| list.is_override())
.unwrap_or_default();
let mode_allowed = !list_is_override
&& self.use_strict()
&& ALLOWED_ELEMENTS_STRICT.contains(element_name);
if !list_allowed && !mode_allowed {
return NodeAction::Ignore;
}
}
if let Some(deny_schemes) =
self.deny_schemes.as_ref().and_then(|map| map.get(element_name))
{
for attr in attrs.iter() {
let value = &attr.value;
let attr_name = attr.name.local.as_ref();
if let Some(schemes) = deny_schemes.get(attr_name) {
if schemes.iter().any(|scheme| value.starts_with(&format!("{scheme}:")))
{
return NodeAction::Ignore;
}
}
}
}
if self.allow_schemes.is_none() && !self.use_strict() {
return NodeAction::None;
}
let list_element_schemes =
self.allow_schemes.as_ref().and_then(|list| list.content.get(element_name));
let list_is_override =
self.allow_schemes.as_ref().map(|list| list.is_override()).unwrap_or_default();
let strict_mode_element_schemes = (!list_is_override && self.use_strict())
.then(|| ALLOWED_SCHEMES_STRICT.get(element_name))
.flatten();
let compat_mode_element_schemes = (!list_is_override && self.use_compat())
.then(|| ALLOWED_SCHEMES_COMPAT.get(element_name))
.flatten();
if list_element_schemes.is_none()
&& strict_mode_element_schemes.is_none()
&& compat_mode_element_schemes.is_none()
{
return NodeAction::None;
}
for attr in attrs.iter() {
let value = &attr.value;
let attr_name = attr.name.local.as_ref();
let list_attr_schemes = list_element_schemes.and_then(|map| map.get(attr_name));
let strict_mode_attr_schemes =
strict_mode_element_schemes.and_then(|map| map.get(attr_name));
let compat_mode_attr_schemes =
compat_mode_element_schemes.and_then(|map| map.get(attr_name));
if list_attr_schemes.is_none()
&& strict_mode_attr_schemes.is_none()
&& compat_mode_attr_schemes.is_none()
{
return NodeAction::None;
}
let mut allowed_schemes = list_attr_schemes
.into_iter()
.flatten()
.chain(strict_mode_attr_schemes.map(|set| set.iter()).into_iter().flatten())
.chain(
compat_mode_attr_schemes.map(|set| set.iter()).into_iter().flatten(),
);
if !allowed_schemes.any(|scheme| value.starts_with(&format!("{scheme}:"))) {
return NodeAction::Ignore;
}
}
NodeAction::None
}
NodeData::Text(_) => NodeAction::None,
_ => NodeAction::Remove,
}
}
fn clean_element_attributes(&self, data: &ElementData) {
let ElementData { name, attrs } = data;
let element_name = name.local.as_ref();
let mut attrs = attrs.borrow_mut();
let list_remove_attrs = self.remove_attrs.as_ref().and_then(|map| map.get(element_name));
let whitelist_attrs = self.allow_attrs.is_some() || self.use_strict();
let list_allow_attrs =
self.allow_attrs.as_ref().and_then(|list| list.content.get(element_name));
let list_is_override =
self.allow_attrs.as_ref().map(|list| list.is_override()).unwrap_or_default();
let mode_allow_attrs = (!list_is_override && self.use_strict())
.then(|| ALLOWED_ATTRIBUTES_STRICT.get(element_name))
.flatten();
let list_remove_classes =
self.remove_classes.as_ref().and_then(|map| map.get(element_name));
let whitelist_classes = self.allow_classes.is_some() || self.use_strict();
let list_allow_classes =
self.allow_classes.as_ref().and_then(|list| list.content.get(element_name));
let list_is_override =
self.allow_classes.as_ref().map(|list| list.is_override()).unwrap_or_default();
let mode_allow_classes = (!list_is_override && self.use_strict())
.then(|| ALLOWED_CLASSES_STRICT.get(element_name))
.flatten();
let actions: Vec<_> = attrs
.iter()
.filter_map(|attr| {
let value = &attr.value;
let attr_name = attr.name.local.as_ref();
if list_remove_attrs.is_some_and(|set| set.contains(attr_name)) {
return Some(AttributeAction::Remove(attr.to_owned()));
}
if whitelist_attrs {
let list_allowed = list_allow_attrs.is_some_and(|set| set.contains(attr_name));
let mode_allowed = mode_allow_attrs.is_some_and(|set| set.contains(attr_name));
if !list_allowed && !mode_allowed {
return Some(AttributeAction::Remove(attr.to_owned()));
}
}
if attr_name == "class" {
let mut classes = value.split_whitespace().collect::<Vec<_>>();
let initial_len = classes.len();
if let Some(remove_classes) = list_remove_classes {
classes.retain(|class| {
for remove_class in remove_classes {
if WildMatch::new(remove_class).matches(class) {
return false;
}
}
true
});
}
if whitelist_classes {
classes.retain(|class| {
let allow_classes = list_allow_classes
.map(|set| set.iter())
.into_iter()
.flatten()
.chain(
mode_allow_classes.map(|set| set.iter()).into_iter().flatten(),
);
for allow_class in allow_classes {
if WildMatch::new(allow_class).matches(class) {
return true;
}
}
false
});
}
if classes.len() == initial_len {
return None;
}
if classes.is_empty() {
return Some(AttributeAction::Remove(attr.to_owned()));
} else {
let new_class = classes.join(" ");
return Some(AttributeAction::ReplaceValue(
attr.to_owned(),
new_class.into(),
));
}
}
None
})
.collect();
for action in actions {
match action {
AttributeAction::ReplaceValue(attr, value) => {
if let Some(mut attr) = attrs.take(&attr) {
attr.value = value;
attrs.insert(attr);
}
}
AttributeAction::Remove(attr) => {
attrs.remove(&attr);
}
}
}
}
}
#[derive(Debug, PartialEq, Eq)]
enum NodeAction {
None,
Ignore,
Remove,
}
#[derive(Debug)]
enum AttributeAction {
ReplaceValue(Attribute, StrTendril),
Remove(Attribute),
}