1//! Encoding and decoding functions according to [RFC 8187].
2//!
3//! [RFC 8187]: https://datatracker.ietf.org/doc/html/rfc8187
45use std::borrow::Cow;
67use percent_encoding::{AsciiSet, NON_ALPHANUMERIC};
89/// The characters to percent-encode according to the `attr-char` set.
10const ATTR_CHAR: AsciiSet = NON_ALPHANUMERIC
11 .remove(b'!')
12 .remove(b'#')
13 .remove(b'$')
14 .remove(b'&')
15 .remove(b'+')
16 .remove(b'-')
17 .remove(b'.')
18 .remove(b'^')
19 .remove(b'_')
20 .remove(b'`')
21 .remove(b'|')
22 .remove(b'~');
2324/// Encode the given string according to [RFC 8187].
25///
26/// [RFC 8187]: https://datatracker.ietf.org/doc/html/rfc8187
27pub(super) fn encode(s: &str) -> String {
28let encoded = percent_encoding::utf8_percent_encode(s, &ATTR_CHAR);
29format!("utf-8''{encoded}")
30}
3132/// Decode the given bytes according to [RFC 8187].
33///
34/// Only the UTF-8 character set is supported, all other character sets return an error.
35///
36/// [RFC 8187]: https://datatracker.ietf.org/doc/html/rfc8187
37pub(super) fn decode(bytes: &[u8]) -> Result<Cow<'_, str>, Rfc8187DecodeError> {
38if bytes.is_empty() {
39return Err(Rfc8187DecodeError::Empty);
40 }
4142let mut parts = bytes.split(|b| *b == b'\'');
43let charset = parts.next().ok_or(Rfc8187DecodeError::WrongPartsCount)?;
44let _lang = parts.next().ok_or(Rfc8187DecodeError::WrongPartsCount)?;
45let encoded = parts.next().ok_or(Rfc8187DecodeError::WrongPartsCount)?;
4647if parts.next().is_some() {
48return Err(Rfc8187DecodeError::WrongPartsCount);
49 }
5051if !charset.eq_ignore_ascii_case(b"utf-8") {
52return Err(Rfc8187DecodeError::NotUtf8);
53 }
5455// For maximum compatibility, do a lossy conversion.
56Ok(percent_encoding::percent_decode(encoded).decode_utf8_lossy())
57}
5859/// All errors encountered when trying to decode a string according to [RFC 8187].
60///
61/// [RFC 8187]: https://datatracker.ietf.org/doc/html/rfc8187
62#[derive(Debug, Clone, Copy, PartialEq, Eq, thiserror::Error)]
63#[non_exhaustive]
64pub(super) enum Rfc8187DecodeError {
65/// The string is empty.
66#[error("string is empty")]
67Empty,
6869/// The string does not contain the right number of parts.
70#[error("string does not contain the right number of parts")]
71WrongPartsCount,
7273/// The character set is not UTF-8.
74#[error("character set is not UTF-8")]
75NotUtf8,
76}