ruma_common/http_headers/
rfc8187.rs

1//! Encoding and decoding functions according to [RFC 8187].
2//!
3//! [RFC 8187]: https://datatracker.ietf.org/doc/html/rfc8187
4
5use std::borrow::Cow;
6
7use percent_encoding::{AsciiSet, NON_ALPHANUMERIC};
8
9/// The characters to percent-encode according to the `attr-char` set.
10const ATTR_CHAR: AsciiSet = NON_ALPHANUMERIC
11    .remove(b'!')
12    .remove(b'#')
13    .remove(b'$')
14    .remove(b'&')
15    .remove(b'+')
16    .remove(b'-')
17    .remove(b'.')
18    .remove(b'^')
19    .remove(b'_')
20    .remove(b'`')
21    .remove(b'|')
22    .remove(b'~');
23
24/// Encode the given string according to [RFC 8187].
25///
26/// [RFC 8187]: https://datatracker.ietf.org/doc/html/rfc8187
27pub(super) fn encode(s: &str) -> String {
28    let encoded = percent_encoding::utf8_percent_encode(s, &ATTR_CHAR);
29    format!("utf-8''{encoded}")
30}
31
32/// Decode the given bytes according to [RFC 8187].
33///
34/// Only the UTF-8 character set is supported, all other character sets return an error.
35///
36/// [RFC 8187]: https://datatracker.ietf.org/doc/html/rfc8187
37pub(super) fn decode(bytes: &[u8]) -> Result<Cow<'_, str>, Rfc8187DecodeError> {
38    if bytes.is_empty() {
39        return Err(Rfc8187DecodeError::Empty);
40    }
41
42    let mut parts = bytes.split(|b| *b == b'\'');
43    let charset = parts.next().ok_or(Rfc8187DecodeError::WrongPartsCount)?;
44    let _lang = parts.next().ok_or(Rfc8187DecodeError::WrongPartsCount)?;
45    let encoded = parts.next().ok_or(Rfc8187DecodeError::WrongPartsCount)?;
46
47    if parts.next().is_some() {
48        return Err(Rfc8187DecodeError::WrongPartsCount);
49    }
50
51    if !charset.eq_ignore_ascii_case(b"utf-8") {
52        return Err(Rfc8187DecodeError::NotUtf8);
53    }
54
55    // For maximum compatibility, do a lossy conversion.
56    Ok(percent_encoding::percent_decode(encoded).decode_utf8_lossy())
57}
58
59/// All errors encountered when trying to decode a string according to [RFC 8187].
60///
61/// [RFC 8187]: https://datatracker.ietf.org/doc/html/rfc8187
62#[derive(Debug, Clone, Copy, PartialEq, Eq, thiserror::Error)]
63#[non_exhaustive]
64pub(super) enum Rfc8187DecodeError {
65    /// The string is empty.
66    #[error("string is empty")]
67    Empty,
68
69    /// The string does not contain the right number of parts.
70    #[error("string does not contain the right number of parts")]
71    WrongPartsCount,
72
73    /// The character set is not UTF-8.
74    #[error("character set is not UTF-8")]
75    NotUtf8,
76}