ruma_common/http_headers/
rfc8187.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
//! Encoding and decoding functions according to [RFC 8187].
//!
//! [RFC 8187]: https://datatracker.ietf.org/doc/html/rfc8187

use std::borrow::Cow;

use percent_encoding::{AsciiSet, NON_ALPHANUMERIC};

/// The characters to percent-encode according to the `attr-char` set.
const ATTR_CHAR: AsciiSet = NON_ALPHANUMERIC
    .remove(b'!')
    .remove(b'#')
    .remove(b'$')
    .remove(b'&')
    .remove(b'+')
    .remove(b'-')
    .remove(b'.')
    .remove(b'^')
    .remove(b'_')
    .remove(b'`')
    .remove(b'|')
    .remove(b'~');

/// Encode the given string according to [RFC 8187].
///
/// [RFC 8187]: https://datatracker.ietf.org/doc/html/rfc8187
pub(super) fn encode(s: &str) -> String {
    let encoded = percent_encoding::utf8_percent_encode(s, &ATTR_CHAR);
    format!("utf-8''{encoded}")
}

/// Decode the given bytes according to [RFC 8187].
///
/// Only the UTF-8 character set is supported, all other character sets return an error.
///
/// [RFC 8187]: https://datatracker.ietf.org/doc/html/rfc8187
pub(super) fn decode(bytes: &[u8]) -> Result<Cow<'_, str>, Rfc8187DecodeError> {
    if bytes.is_empty() {
        return Err(Rfc8187DecodeError::Empty);
    }

    let mut parts = bytes.split(|b| *b == b'\'');
    let charset = parts.next().ok_or(Rfc8187DecodeError::WrongPartsCount)?;
    let _lang = parts.next().ok_or(Rfc8187DecodeError::WrongPartsCount)?;
    let encoded = parts.next().ok_or(Rfc8187DecodeError::WrongPartsCount)?;

    if parts.next().is_some() {
        return Err(Rfc8187DecodeError::WrongPartsCount);
    }

    if !charset.eq_ignore_ascii_case(b"utf-8") {
        return Err(Rfc8187DecodeError::NotUtf8);
    }

    // For maximum compatibility, do a lossy conversion.
    Ok(percent_encoding::percent_decode(encoded).decode_utf8_lossy())
}

/// All errors encountered when trying to decode a string according to [RFC 8187].
///
/// [RFC 8187]: https://datatracker.ietf.org/doc/html/rfc8187
#[derive(Debug, Clone, Copy, PartialEq, Eq, thiserror::Error)]
#[non_exhaustive]
pub(super) enum Rfc8187DecodeError {
    /// The string is empty.
    #[error("string is empty")]
    Empty,

    /// The string does not contain the right number of parts.
    #[error("string does not contain the right number of parts")]
    WrongPartsCount,

    /// The character set is not UTF-8.
    #[error("character set is not UTF-8")]
    NotUtf8,
}