ruma_federation_api/
authenticated_media.rs

1//! Authenticated endpoints for the content repository, according to [MSC3916].
2//!
3//! [MSC3916]: https://github.com/matrix-org/matrix-spec-proposals/pull/3916
4
5use ruma_common::http_headers::ContentDisposition;
6use serde::{Deserialize, Serialize};
7
8pub mod get_content;
9pub mod get_content_thumbnail;
10
11/// The `multipart/mixed` mime "essence".
12const MULTIPART_MIXED: &str = "multipart/mixed";
13/// The maximum number of headers to parse in a body part.
14#[cfg(feature = "client")]
15const MAX_HEADERS_COUNT: usize = 32;
16/// The length of the generated boundary.
17#[cfg(feature = "server")]
18const GENERATED_BOUNDARY_LENGTH: usize = 30;
19
20/// The metadata of a file from the content repository.
21#[derive(Debug, Clone, Default, Serialize, Deserialize)]
22#[cfg_attr(not(ruma_unstable_exhaustive_types), non_exhaustive)]
23pub struct ContentMetadata {}
24
25impl ContentMetadata {
26    /// Creates a new empty `ContentMetadata`.
27    pub fn new() -> Self {
28        Self {}
29    }
30}
31
32/// A file from the content repository or the location where it can be found.
33#[derive(Debug, Clone)]
34#[cfg_attr(not(ruma_unstable_exhaustive_types), non_exhaustive)]
35pub enum FileOrLocation {
36    /// The content of the file.
37    File(Content),
38
39    /// The file is at the given URL.
40    Location(String),
41}
42
43/// The content of a file from the content repository.
44#[derive(Debug, Clone)]
45#[cfg_attr(not(ruma_unstable_exhaustive_types), non_exhaustive)]
46pub struct Content {
47    /// The content of the file as bytes.
48    pub file: Vec<u8>,
49
50    /// The content type of the file that was previously uploaded.
51    pub content_type: Option<String>,
52
53    /// The value of the `Content-Disposition` HTTP header, possibly containing the name of the
54    /// file that was previously uploaded.
55    pub content_disposition: Option<ContentDisposition>,
56}
57
58impl Content {
59    /// Creates a new `Content` with the given bytes.
60    pub fn new(
61        file: Vec<u8>,
62        content_type: String,
63        content_disposition: ContentDisposition,
64    ) -> Self {
65        Self {
66            file,
67            content_type: Some(content_type),
68            content_disposition: Some(content_disposition),
69        }
70    }
71}
72
73/// Serialize the given metadata and content into a `http::Response` `multipart/mixed` body.
74///
75/// Returns a tuple containing the boundary used
76#[cfg(feature = "server")]
77fn try_into_multipart_mixed_response<T: Default + bytes::BufMut>(
78    metadata: &ContentMetadata,
79    content: &FileOrLocation,
80) -> Result<http::Response<T>, ruma_common::api::error::IntoHttpError> {
81    use std::io::Write as _;
82
83    use rand::Rng as _;
84
85    let boundary = rand::thread_rng()
86        .sample_iter(&rand::distributions::Alphanumeric)
87        .map(char::from)
88        .take(GENERATED_BOUNDARY_LENGTH)
89        .collect::<String>();
90
91    let mut body_writer = T::default().writer();
92
93    // Add first boundary separator and header for the metadata.
94    let _ = write!(
95        body_writer,
96        "\r\n--{boundary}\r\n{}: {}\r\n\r\n",
97        http::header::CONTENT_TYPE,
98        mime::APPLICATION_JSON
99    );
100
101    // Add serialized metadata.
102    serde_json::to_writer(&mut body_writer, metadata)?;
103
104    // Add second boundary separator.
105    let _ = write!(body_writer, "\r\n--{boundary}\r\n");
106
107    // Add content.
108    match content {
109        FileOrLocation::File(content) => {
110            // Add headers.
111            let content_type =
112                content.content_type.as_deref().unwrap_or(mime::APPLICATION_OCTET_STREAM.as_ref());
113            let _ = write!(body_writer, "{}: {content_type}\r\n", http::header::CONTENT_TYPE);
114
115            if let Some(content_disposition) = &content.content_disposition {
116                let _ = write!(
117                    body_writer,
118                    "{}: {content_disposition}\r\n",
119                    http::header::CONTENT_DISPOSITION
120                );
121            }
122
123            // Add empty line separator after headers.
124            let _ = body_writer.write_all(b"\r\n");
125
126            // Add bytes.
127            let _ = body_writer.write_all(&content.file);
128        }
129        FileOrLocation::Location(location) => {
130            // Only add location header and empty line separator.
131            let _ = write!(body_writer, "{}: {location}\r\n\r\n", http::header::LOCATION);
132        }
133    }
134
135    // Add final boundary.
136    let _ = write!(body_writer, "\r\n--{boundary}--");
137
138    let content_type = format!("{MULTIPART_MIXED}; boundary={boundary}");
139    let body = body_writer.into_inner();
140
141    Ok(http::Response::builder().header(http::header::CONTENT_TYPE, content_type).body(body)?)
142}
143
144/// Deserialize the given metadata and content from a `http::Response` with a `multipart/mixed`
145/// body.
146#[cfg(feature = "client")]
147fn try_from_multipart_mixed_response<T: AsRef<[u8]>>(
148    http_response: http::Response<T>,
149) -> Result<
150    (ContentMetadata, FileOrLocation),
151    ruma_common::api::error::FromHttpResponseError<ruma_common::api::error::MatrixError>,
152> {
153    use ruma_common::api::error::{HeaderDeserializationError, MultipartMixedDeserializationError};
154
155    // First, get the boundary from the content type header.
156    let body_content_type = http_response
157        .headers()
158        .get(http::header::CONTENT_TYPE)
159        .ok_or_else(|| HeaderDeserializationError::MissingHeader("Content-Type".to_owned()))?
160        .to_str()?
161        .parse::<mime::Mime>()
162        .map_err(|e| HeaderDeserializationError::InvalidHeader(e.into()))?;
163
164    if !body_content_type.essence_str().eq_ignore_ascii_case(MULTIPART_MIXED) {
165        return Err(HeaderDeserializationError::InvalidHeaderValue {
166            header: "Content-Type".to_owned(),
167            expected: MULTIPART_MIXED.to_owned(),
168            unexpected: body_content_type.essence_str().to_owned(),
169        }
170        .into());
171    }
172
173    let boundary = body_content_type
174        .get_param("boundary")
175        .ok_or(HeaderDeserializationError::MissingMultipartBoundary)?
176        .as_str()
177        .as_bytes();
178
179    // Split the body with the boundary.
180    let body = http_response.body().as_ref();
181
182    let mut full_boundary = Vec::with_capacity(boundary.len() + 4);
183    full_boundary.extend_from_slice(b"\r\n--");
184    full_boundary.extend_from_slice(boundary);
185    let full_boundary_no_crlf = full_boundary.strip_prefix(b"\r\n").unwrap();
186
187    let mut boundaries = memchr::memmem::find_iter(body, &full_boundary);
188
189    let metadata_start = if body.starts_with(full_boundary_no_crlf) {
190        // If there is no preamble before the first boundary, it may omit the
191        // preceding CRLF.
192        full_boundary_no_crlf.len()
193    } else {
194        boundaries.next().ok_or_else(|| MultipartMixedDeserializationError::MissingBodyParts {
195            expected: 2,
196            found: 0,
197        })? + full_boundary.len()
198    };
199    let metadata_end = boundaries.next().ok_or_else(|| {
200        MultipartMixedDeserializationError::MissingBodyParts { expected: 2, found: 0 }
201    })?;
202
203    let (_raw_metadata_headers, serialized_metadata) =
204        parse_multipart_body_part(body, metadata_start, metadata_end)?;
205
206    // Don't search for anything in the headers, just deserialize the content that should be JSON.
207    let metadata = serde_json::from_slice(serialized_metadata)?;
208
209    // Look at the part containing the media content now.
210    let content_start = metadata_end + full_boundary.len();
211    let content_end = boundaries.next().ok_or_else(|| {
212        MultipartMixedDeserializationError::MissingBodyParts { expected: 2, found: 1 }
213    })?;
214
215    let (raw_content_headers, file) = parse_multipart_body_part(body, content_start, content_end)?;
216
217    // Parse the headers to retrieve the content type and content disposition.
218    let mut content_headers = [httparse::EMPTY_HEADER; MAX_HEADERS_COUNT];
219    httparse::parse_headers(raw_content_headers, &mut content_headers)
220        .map_err(|e| MultipartMixedDeserializationError::InvalidHeader(e.into()))?;
221
222    let mut location = None;
223    let mut content_type = None;
224    let mut content_disposition = None;
225    for header in content_headers {
226        if header.name.is_empty() {
227            // This is a empty header, we have reached the end of the parsed headers.
228            break;
229        }
230
231        if header.name == http::header::LOCATION {
232            location = Some(
233                String::from_utf8(header.value.to_vec())
234                    .map_err(|e| MultipartMixedDeserializationError::InvalidHeader(e.into()))?,
235            );
236
237            // This is the only header we need, stop parsing.
238            break;
239        } else if header.name == http::header::CONTENT_TYPE {
240            content_type = Some(
241                String::from_utf8(header.value.to_vec())
242                    .map_err(|e| MultipartMixedDeserializationError::InvalidHeader(e.into()))?,
243            );
244        } else if header.name == http::header::CONTENT_DISPOSITION {
245            content_disposition = Some(
246                ContentDisposition::try_from(header.value)
247                    .map_err(|e| MultipartMixedDeserializationError::InvalidHeader(e.into()))?,
248            );
249        }
250    }
251
252    let content = if let Some(location) = location {
253        FileOrLocation::Location(location)
254    } else {
255        FileOrLocation::File(Content { file: file.to_owned(), content_type, content_disposition })
256    };
257
258    Ok((metadata, content))
259}
260
261/// Parse the multipart body part in the given bytes, starting and ending at the given positions.
262///
263/// Returns a `(headers_bytes, content_bytes)` tuple. Returns an error if the separation between the
264/// headers and the content could not be found.
265#[cfg(feature = "client")]
266fn parse_multipart_body_part(
267    bytes: &[u8],
268    start: usize,
269    end: usize,
270) -> Result<(&[u8], &[u8]), ruma_common::api::error::MultipartMixedDeserializationError> {
271    use ruma_common::api::error::MultipartMixedDeserializationError;
272
273    // The part should start with a newline after the boundary. We need to ignore characters before
274    // it in case of extra whitespaces, and for compatibility it might not have a CR.
275    let headers_start = memchr::memchr(b'\n', &bytes[start..end])
276        .expect("the end boundary contains a newline")
277        + start
278        + 1;
279
280    // Let's find an empty line now.
281    let mut line_start = headers_start;
282    let mut line_end;
283
284    loop {
285        line_end = memchr::memchr(b'\n', &bytes[line_start..end])
286            .ok_or(MultipartMixedDeserializationError::MissingBodyPartInnerSeparator)?
287            + line_start
288            + 1;
289
290        if matches!(&bytes[line_start..line_end], b"\r\n" | b"\n") {
291            break;
292        }
293
294        line_start = line_end;
295    }
296
297    Ok((&bytes[headers_start..line_start], &bytes[line_end..end]))
298}
299
300#[cfg(all(test, feature = "client", feature = "server"))]
301mod tests {
302    use assert_matches2::assert_matches;
303    use ruma_common::http_headers::{ContentDisposition, ContentDispositionType};
304
305    use super::{
306        Content, ContentMetadata, FileOrLocation, try_from_multipart_mixed_response,
307        try_into_multipart_mixed_response,
308    };
309
310    #[test]
311    fn multipart_mixed_content_ascii_filename_conversions() {
312        let file = "s⌽me UTF-8 Ťext".as_bytes();
313        let content_type = "text/plain";
314        let content_disposition = ContentDisposition::new(ContentDispositionType::Attachment)
315            .with_filename(Some("filename.txt".to_owned()));
316
317        let outgoing_metadata = ContentMetadata::new();
318        let outgoing_content = FileOrLocation::File(Content {
319            file: file.to_vec(),
320            content_type: Some(content_type.to_owned()),
321            content_disposition: Some(content_disposition.clone()),
322        });
323
324        let response =
325            try_into_multipart_mixed_response::<Vec<u8>>(&outgoing_metadata, &outgoing_content)
326                .unwrap();
327
328        let (_incoming_metadata, incoming_content) =
329            try_from_multipart_mixed_response(response).unwrap();
330
331        assert_matches!(incoming_content, FileOrLocation::File(incoming_content));
332        assert_eq!(incoming_content.file, file);
333        assert_eq!(incoming_content.content_type.unwrap(), content_type);
334        assert_eq!(incoming_content.content_disposition, Some(content_disposition));
335    }
336
337    #[test]
338    fn multipart_mixed_content_utf8_filename_conversions() {
339        let file = "s⌽me UTF-8 Ťext".as_bytes();
340        let content_type = "text/plain";
341        let content_disposition = ContentDisposition::new(ContentDispositionType::Attachment)
342            .with_filename(Some("fȈlƩnąmǝ.txt".to_owned()));
343
344        let outgoing_metadata = ContentMetadata::new();
345        let outgoing_content = FileOrLocation::File(Content {
346            file: file.to_vec(),
347            content_type: Some(content_type.to_owned()),
348            content_disposition: Some(content_disposition.clone()),
349        });
350
351        let response =
352            try_into_multipart_mixed_response::<Vec<u8>>(&outgoing_metadata, &outgoing_content)
353                .unwrap();
354
355        let (_incoming_metadata, incoming_content) =
356            try_from_multipart_mixed_response(response).unwrap();
357
358        assert_matches!(incoming_content, FileOrLocation::File(incoming_content));
359        assert_eq!(incoming_content.file, file);
360        assert_eq!(incoming_content.content_type.unwrap(), content_type);
361        assert_eq!(incoming_content.content_disposition, Some(content_disposition));
362    }
363
364    #[test]
365    fn multipart_mixed_location_conversions() {
366        let location = "https://server.local/media/filename.txt";
367
368        let outgoing_metadata = ContentMetadata::new();
369        let outgoing_content = FileOrLocation::Location(location.to_owned());
370
371        let response =
372            try_into_multipart_mixed_response::<Vec<u8>>(&outgoing_metadata, &outgoing_content)
373                .unwrap();
374
375        let (_incoming_metadata, incoming_content) =
376            try_from_multipart_mixed_response(response).unwrap();
377
378        assert_matches!(incoming_content, FileOrLocation::Location(incoming_location));
379        assert_eq!(incoming_location, location);
380    }
381
382    #[test]
383    fn multipart_mixed_deserialize_invalid() {
384        // Missing boundary in headers.
385        let body = "\r\n--abcdef\r\n\r\n{}\r\n--abcdef\r\nContent-Type: text/plain\r\n\r\nsome plain text\r\n--abcdef--";
386        let response = http::Response::builder()
387            .header(http::header::CONTENT_TYPE, "multipart/mixed")
388            .body(body)
389            .unwrap();
390
391        try_from_multipart_mixed_response(response).unwrap_err();
392
393        // Wrong boundary.
394        let body = "\r\n--abcdef\r\n\r\n{}\r\n--abcdef\r\nContent-Type: text/plain\r\n\r\nsome plain text\r\n--abcdef--";
395        let response = http::Response::builder()
396            .header(http::header::CONTENT_TYPE, "multipart/mixed; boundary=012345")
397            .body(body)
398            .unwrap();
399
400        try_from_multipart_mixed_response(response).unwrap_err();
401
402        // Missing boundary in body.
403        let body =
404            "\r\n--abcdef\r\n\r\n{}\r\n--abcdef\r\nContent-Type: text/plain\r\n\r\nsome plain text";
405        let response = http::Response::builder()
406            .header(http::header::CONTENT_TYPE, "multipart/mixed; boundary=abcdef")
407            .body(body)
408            .unwrap();
409
410        try_from_multipart_mixed_response(response).unwrap_err();
411
412        // Missing header and content empty line separator in body part.
413        let body = "\r\n--abcdef\r\n{}\r\n--abcdef\r\nContent-Type: text/plain\r\n\r\nsome plain text\r\n--abcdef--";
414        let response = http::Response::builder()
415            .header(http::header::CONTENT_TYPE, "multipart/mixed; boundary=abcdef")
416            .body(body)
417            .unwrap();
418
419        try_from_multipart_mixed_response(response).unwrap_err();
420
421        // Control character in header.
422        let body = "\r\n--abcdef\r\n\r\n{}\r\n--abcdef\r\nContent-Type: text/plain\r\nContent-Disposition: inline; filename=\"my\nfile\"\r\nsome plain text\r\n--abcdef--";
423        let response = http::Response::builder()
424            .header(http::header::CONTENT_TYPE, "multipart/mixed; boundary=abcdef")
425            .body(body)
426            .unwrap();
427
428        try_from_multipart_mixed_response(response).unwrap_err();
429
430        // Boundary without CRLF with preamble.
431        let body = "foo--abcdef\r\n\r\n{}\r\n--abcdef\r\n\r\nsome plain text\r\n--abcdef--";
432        let response = http::Response::builder()
433            .header(http::header::CONTENT_TYPE, "multipart/mixed; boundary=abcdef")
434            .body(body)
435            .unwrap();
436
437        try_from_multipart_mixed_response(response).unwrap_err();
438    }
439
440    #[test]
441    fn multipart_mixed_deserialize_valid() {
442        // Simple.
443        let body = "\r\n--abcdef\r\ncontent-type: application/json\r\n\r\n{}\r\n--abcdef\r\ncontent-type: text/plain\r\n\r\nsome plain text\r\n--abcdef--";
444        let response = http::Response::builder()
445            .header(http::header::CONTENT_TYPE, "multipart/mixed; boundary=abcdef")
446            .body(body)
447            .unwrap();
448
449        let (_metadata, content) = try_from_multipart_mixed_response(response).unwrap();
450
451        assert_matches!(content, FileOrLocation::File(file_content));
452        assert_eq!(file_content.file, b"some plain text");
453        assert_eq!(file_content.content_type.unwrap(), "text/plain");
454        assert_eq!(file_content.content_disposition, None);
455
456        // Case-insensitive headers.
457        let body = "\r\n--abcdef\r\nCONTENT-type: application/json\r\n\r\n{}\r\n--abcdef\r\nCONTENT-TYPE: text/plain\r\ncoNtenT-disPosItioN: attachment; filename=my_file.txt\r\n\r\nsome plain text\r\n--abcdef--";
458        let response = http::Response::builder()
459            .header(http::header::CONTENT_TYPE, "multipart/mixed; boundary=abcdef")
460            .body(body)
461            .unwrap();
462
463        let (_metadata, content) = try_from_multipart_mixed_response(response).unwrap();
464
465        assert_matches!(content, FileOrLocation::File(file_content));
466        assert_eq!(file_content.file, b"some plain text");
467        assert_eq!(file_content.content_type.unwrap(), "text/plain");
468        let content_disposition = file_content.content_disposition.unwrap();
469        assert_eq!(content_disposition.disposition_type, ContentDispositionType::Attachment);
470        assert_eq!(content_disposition.filename.unwrap(), "my_file.txt");
471
472        // Extra whitespace.
473        let body = "   \r\n--abcdef\r\ncontent-type:   application/json   \r\n\r\n {} \r\n--abcdef\r\ncontent-type: text/plain  \r\n\r\nsome plain text\r\n--abcdef--  ";
474        let response = http::Response::builder()
475            .header(http::header::CONTENT_TYPE, "multipart/mixed; boundary=abcdef")
476            .body(body)
477            .unwrap();
478
479        let (_metadata, content) = try_from_multipart_mixed_response(response).unwrap();
480
481        assert_matches!(content, FileOrLocation::File(file_content));
482        assert_eq!(file_content.file, b"some plain text");
483        assert_eq!(file_content.content_type.unwrap(), "text/plain");
484        assert_eq!(file_content.content_disposition, None);
485
486        // Missing CR except in boundaries.
487        let body = "\r\n--abcdef\ncontent-type: application/json\n\n{}\r\n--abcdef\ncontent-type: text/plain  \n\nsome plain text\r\n--abcdef--";
488        let response = http::Response::builder()
489            .header(http::header::CONTENT_TYPE, "multipart/mixed; boundary=abcdef")
490            .body(body)
491            .unwrap();
492
493        let (_metadata, content) = try_from_multipart_mixed_response(response).unwrap();
494
495        assert_matches!(content, FileOrLocation::File(file_content));
496        assert_eq!(file_content.file, b"some plain text");
497        assert_eq!(file_content.content_type.unwrap(), "text/plain");
498        assert_eq!(file_content.content_disposition, None);
499
500        // No leading CRLF (and no preamble)
501        let body = "--abcdef\r\n\r\n{}\r\n--abcdef\r\n\r\nsome plain text\r\n--abcdef--";
502        let response = http::Response::builder()
503            .header(http::header::CONTENT_TYPE, "multipart/mixed; boundary=abcdef")
504            .body(body)
505            .unwrap();
506
507        let (_metadata, content) = try_from_multipart_mixed_response(response).unwrap();
508
509        assert_matches!(content, FileOrLocation::File(file_content));
510        assert_eq!(file_content.file, b"some plain text");
511        assert_eq!(file_content.content_type, None);
512        assert_eq!(file_content.content_disposition, None);
513
514        // Boundary text in preamble, but no leading CRLF, so it should be
515        // ignored.
516        let body =
517            "foo--abcdef\r\n--abcdef\r\n\r\n{}\r\n--abcdef\r\n\r\nsome plain text\r\n--abcdef--";
518        let response = http::Response::builder()
519            .header(http::header::CONTENT_TYPE, "multipart/mixed; boundary=abcdef")
520            .body(body)
521            .unwrap();
522
523        let (_metadata, content) = try_from_multipart_mixed_response(response).unwrap();
524
525        assert_matches!(content, FileOrLocation::File(file_content));
526        assert_eq!(file_content.file, b"some plain text");
527        assert_eq!(file_content.content_type, None);
528        assert_eq!(file_content.content_disposition, None);
529
530        // No body part headers.
531        let body = "\r\n--abcdef\r\n\r\n{}\r\n--abcdef\r\n\r\nsome plain text\r\n--abcdef--";
532        let response = http::Response::builder()
533            .header(http::header::CONTENT_TYPE, "multipart/mixed; boundary=abcdef")
534            .body(body)
535            .unwrap();
536
537        let (_metadata, content) = try_from_multipart_mixed_response(response).unwrap();
538
539        assert_matches!(content, FileOrLocation::File(file_content));
540        assert_eq!(file_content.file, b"some plain text");
541        assert_eq!(file_content.content_type, None);
542        assert_eq!(file_content.content_disposition, None);
543
544        // Raw UTF-8 filename (some kind of compatibility with multipart/form-data).
545        let body = "\r\n--abcdef\r\ncontent-type: application/json\r\n\r\n{}\r\n--abcdef\r\ncontent-type: text/plain\r\ncontent-disposition: inline; filename=\"ȵ⌾Ⱦԩ💈Ňɠ\"\r\n\r\nsome plain text\r\n--abcdef--";
546        let response = http::Response::builder()
547            .header(http::header::CONTENT_TYPE, "multipart/mixed; boundary=abcdef")
548            .body(body)
549            .unwrap();
550
551        let (_metadata, content) = try_from_multipart_mixed_response(response).unwrap();
552
553        assert_matches!(content, FileOrLocation::File(file_content));
554        assert_eq!(file_content.file, b"some plain text");
555        assert_eq!(file_content.content_type.unwrap(), "text/plain");
556        let content_disposition = file_content.content_disposition.unwrap();
557        assert_eq!(content_disposition.disposition_type, ContentDispositionType::Inline);
558        assert_eq!(content_disposition.filename.unwrap(), "ȵ⌾Ⱦԩ💈Ňɠ");
559    }
560}