ruma_federation_api/
authenticated_media.rs

1//! Authenticated endpoints for the content repository, according to [MSC3916].
2//!
3//! [MSC3916]: https://github.com/matrix-org/matrix-spec-proposals/pull/3916
4
5use ruma_common::http_headers::ContentDisposition;
6use serde::{Deserialize, Serialize};
7
8pub mod get_content;
9pub mod get_content_thumbnail;
10
11/// The `multipart/mixed` mime "essence".
12const MULTIPART_MIXED: &str = "multipart/mixed";
13/// The maximum number of headers to parse in a body part.
14const MAX_HEADERS_COUNT: usize = 32;
15/// The length of the generated boundary.
16const GENERATED_BOUNDARY_LENGTH: usize = 30;
17
18/// The metadata of a file from the content repository.
19#[derive(Debug, Clone, Default, Serialize, Deserialize)]
20#[cfg_attr(not(ruma_unstable_exhaustive_types), non_exhaustive)]
21pub struct ContentMetadata {}
22
23impl ContentMetadata {
24    /// Creates a new empty `ContentMetadata`.
25    pub fn new() -> Self {
26        Self {}
27    }
28}
29
30/// A file from the content repository or the location where it can be found.
31#[derive(Debug, Clone)]
32#[cfg_attr(not(ruma_unstable_exhaustive_types), non_exhaustive)]
33pub enum FileOrLocation {
34    /// The content of the file.
35    File(Content),
36
37    /// The file is at the given URL.
38    Location(String),
39}
40
41/// The content of a file from the content repository.
42#[derive(Debug, Clone)]
43#[cfg_attr(not(ruma_unstable_exhaustive_types), non_exhaustive)]
44pub struct Content {
45    /// The content of the file as bytes.
46    pub file: Vec<u8>,
47
48    /// The content type of the file that was previously uploaded.
49    pub content_type: Option<String>,
50
51    /// The value of the `Content-Disposition` HTTP header, possibly containing the name of the
52    /// file that was previously uploaded.
53    pub content_disposition: Option<ContentDisposition>,
54}
55
56impl Content {
57    /// Creates a new `Content` with the given bytes.
58    pub fn new(
59        file: Vec<u8>,
60        content_type: String,
61        content_disposition: ContentDisposition,
62    ) -> Self {
63        Self {
64            file,
65            content_type: Some(content_type),
66            content_disposition: Some(content_disposition),
67        }
68    }
69}
70
71/// Serialize the given metadata and content into a `http::Response` `multipart/mixed` body.
72///
73/// Returns a tuple containing the boundary used
74#[cfg(feature = "server")]
75fn try_into_multipart_mixed_response<T: Default + bytes::BufMut>(
76    metadata: &ContentMetadata,
77    content: &FileOrLocation,
78) -> Result<http::Response<T>, ruma_common::api::error::IntoHttpError> {
79    use std::io::Write as _;
80
81    use rand::Rng as _;
82
83    let boundary = rand::thread_rng()
84        .sample_iter(&rand::distributions::Alphanumeric)
85        .map(char::from)
86        .take(GENERATED_BOUNDARY_LENGTH)
87        .collect::<String>();
88
89    let mut body_writer = T::default().writer();
90
91    // Add first boundary separator and header for the metadata.
92    let _ = write!(
93        body_writer,
94        "\r\n--{boundary}\r\n{}: {}\r\n\r\n",
95        http::header::CONTENT_TYPE,
96        mime::APPLICATION_JSON
97    );
98
99    // Add serialized metadata.
100    serde_json::to_writer(&mut body_writer, metadata)?;
101
102    // Add second boundary separator.
103    let _ = write!(body_writer, "\r\n--{boundary}\r\n");
104
105    // Add content.
106    match content {
107        FileOrLocation::File(content) => {
108            // Add headers.
109            let content_type =
110                content.content_type.as_deref().unwrap_or(mime::APPLICATION_OCTET_STREAM.as_ref());
111            let _ = write!(body_writer, "{}: {content_type}\r\n", http::header::CONTENT_TYPE);
112
113            if let Some(content_disposition) = &content.content_disposition {
114                let _ = write!(
115                    body_writer,
116                    "{}: {content_disposition}\r\n",
117                    http::header::CONTENT_DISPOSITION
118                );
119            }
120
121            // Add empty line separator after headers.
122            let _ = body_writer.write_all(b"\r\n");
123
124            // Add bytes.
125            let _ = body_writer.write_all(&content.file);
126        }
127        FileOrLocation::Location(location) => {
128            // Only add location header and empty line separator.
129            let _ = write!(body_writer, "{}: {location}\r\n\r\n", http::header::LOCATION);
130        }
131    }
132
133    // Add final boundary.
134    let _ = write!(body_writer, "\r\n--{boundary}--");
135
136    let content_type = format!("{MULTIPART_MIXED}; boundary={boundary}");
137    let body = body_writer.into_inner();
138
139    Ok(http::Response::builder().header(http::header::CONTENT_TYPE, content_type).body(body)?)
140}
141
142/// Deserialize the given metadata and content from a `http::Response` with a `multipart/mixed`
143/// body.
144#[cfg(feature = "client")]
145fn try_from_multipart_mixed_response<T: AsRef<[u8]>>(
146    http_response: http::Response<T>,
147) -> Result<
148    (ContentMetadata, FileOrLocation),
149    ruma_common::api::error::FromHttpResponseError<ruma_common::api::error::MatrixError>,
150> {
151    use ruma_common::api::error::{HeaderDeserializationError, MultipartMixedDeserializationError};
152
153    // First, get the boundary from the content type header.
154    let body_content_type = http_response
155        .headers()
156        .get(http::header::CONTENT_TYPE)
157        .ok_or_else(|| HeaderDeserializationError::MissingHeader("Content-Type".to_owned()))?
158        .to_str()?
159        .parse::<mime::Mime>()
160        .map_err(|e| HeaderDeserializationError::InvalidHeader(e.into()))?;
161
162    if !body_content_type.essence_str().eq_ignore_ascii_case(MULTIPART_MIXED) {
163        return Err(HeaderDeserializationError::InvalidHeaderValue {
164            header: "Content-Type".to_owned(),
165            expected: MULTIPART_MIXED.to_owned(),
166            unexpected: body_content_type.essence_str().to_owned(),
167        }
168        .into());
169    }
170
171    let boundary = body_content_type
172        .get_param("boundary")
173        .ok_or(HeaderDeserializationError::MissingMultipartBoundary)?
174        .as_str()
175        .as_bytes();
176
177    // Split the body with the boundary.
178    let body = http_response.body().as_ref();
179
180    let mut full_boundary = Vec::with_capacity(boundary.len() + 4);
181    full_boundary.extend_from_slice(b"\r\n--");
182    full_boundary.extend_from_slice(boundary);
183    let full_boundary_no_crlf = full_boundary.strip_prefix(b"\r\n").unwrap();
184
185    let mut boundaries = memchr::memmem::find_iter(body, &full_boundary);
186
187    let metadata_start = if body.starts_with(full_boundary_no_crlf) {
188        // If there is no preamble before the first boundary, it may omit the
189        // preceding CRLF.
190        full_boundary_no_crlf.len()
191    } else {
192        boundaries.next().ok_or_else(|| MultipartMixedDeserializationError::MissingBodyParts {
193            expected: 2,
194            found: 0,
195        })? + full_boundary.len()
196    };
197    let metadata_end = boundaries.next().ok_or_else(|| {
198        MultipartMixedDeserializationError::MissingBodyParts { expected: 2, found: 0 }
199    })?;
200
201    let (_raw_metadata_headers, serialized_metadata) =
202        parse_multipart_body_part(body, metadata_start, metadata_end)?;
203
204    // Don't search for anything in the headers, just deserialize the content that should be JSON.
205    let metadata = serde_json::from_slice(serialized_metadata)?;
206
207    // Look at the part containing the media content now.
208    let content_start = metadata_end + full_boundary.len();
209    let content_end = boundaries.next().ok_or_else(|| {
210        MultipartMixedDeserializationError::MissingBodyParts { expected: 2, found: 1 }
211    })?;
212
213    let (raw_content_headers, file) = parse_multipart_body_part(body, content_start, content_end)?;
214
215    // Parse the headers to retrieve the content type and content disposition.
216    let mut content_headers = [httparse::EMPTY_HEADER; MAX_HEADERS_COUNT];
217    httparse::parse_headers(raw_content_headers, &mut content_headers)
218        .map_err(|e| MultipartMixedDeserializationError::InvalidHeader(e.into()))?;
219
220    let mut location = None;
221    let mut content_type = None;
222    let mut content_disposition = None;
223    for header in content_headers {
224        if header.name.is_empty() {
225            // This is a empty header, we have reached the end of the parsed headers.
226            break;
227        }
228
229        if header.name == http::header::LOCATION {
230            location = Some(
231                String::from_utf8(header.value.to_vec())
232                    .map_err(|e| MultipartMixedDeserializationError::InvalidHeader(e.into()))?,
233            );
234
235            // This is the only header we need, stop parsing.
236            break;
237        } else if header.name == http::header::CONTENT_TYPE {
238            content_type = Some(
239                String::from_utf8(header.value.to_vec())
240                    .map_err(|e| MultipartMixedDeserializationError::InvalidHeader(e.into()))?,
241            );
242        } else if header.name == http::header::CONTENT_DISPOSITION {
243            content_disposition = Some(
244                ContentDisposition::try_from(header.value)
245                    .map_err(|e| MultipartMixedDeserializationError::InvalidHeader(e.into()))?,
246            );
247        }
248    }
249
250    let content = if let Some(location) = location {
251        FileOrLocation::Location(location)
252    } else {
253        FileOrLocation::File(Content { file: file.to_owned(), content_type, content_disposition })
254    };
255
256    Ok((metadata, content))
257}
258
259/// Parse the multipart body part in the given bytes, starting and ending at the given positions.
260///
261/// Returns a `(headers_bytes, content_bytes)` tuple. Returns an error if the separation between the
262/// headers and the content could not be found.
263#[cfg(feature = "client")]
264fn parse_multipart_body_part(
265    bytes: &[u8],
266    start: usize,
267    end: usize,
268) -> Result<(&[u8], &[u8]), ruma_common::api::error::MultipartMixedDeserializationError> {
269    use ruma_common::api::error::MultipartMixedDeserializationError;
270
271    // The part should start with a newline after the boundary. We need to ignore characters before
272    // it in case of extra whitespaces, and for compatibility it might not have a CR.
273    let headers_start = memchr::memchr(b'\n', &bytes[start..end])
274        .expect("the end boundary contains a newline")
275        + start
276        + 1;
277
278    // Let's find an empty line now.
279    let mut line_start = headers_start;
280    let mut line_end;
281
282    loop {
283        line_end = memchr::memchr(b'\n', &bytes[line_start..end])
284            .ok_or(MultipartMixedDeserializationError::MissingBodyPartInnerSeparator)?
285            + line_start
286            + 1;
287
288        if matches!(&bytes[line_start..line_end], b"\r\n" | b"\n") {
289            break;
290        }
291
292        line_start = line_end;
293    }
294
295    Ok((&bytes[headers_start..line_start], &bytes[line_end..end]))
296}
297
298#[cfg(all(test, feature = "client", feature = "server"))]
299mod tests {
300    use assert_matches2::assert_matches;
301    use ruma_common::http_headers::{ContentDisposition, ContentDispositionType};
302
303    use super::{
304        try_from_multipart_mixed_response, try_into_multipart_mixed_response, Content,
305        ContentMetadata, FileOrLocation,
306    };
307
308    #[test]
309    fn multipart_mixed_content_ascii_filename_conversions() {
310        let file = "s⌽me UTF-8 Ťext".as_bytes();
311        let content_type = "text/plain";
312        let content_disposition = ContentDisposition::new(ContentDispositionType::Attachment)
313            .with_filename(Some("filename.txt".to_owned()));
314
315        let outgoing_metadata = ContentMetadata::new();
316        let outgoing_content = FileOrLocation::File(Content {
317            file: file.to_vec(),
318            content_type: Some(content_type.to_owned()),
319            content_disposition: Some(content_disposition.clone()),
320        });
321
322        let response =
323            try_into_multipart_mixed_response::<Vec<u8>>(&outgoing_metadata, &outgoing_content)
324                .unwrap();
325
326        let (_incoming_metadata, incoming_content) =
327            try_from_multipart_mixed_response(response).unwrap();
328
329        assert_matches!(incoming_content, FileOrLocation::File(incoming_content));
330        assert_eq!(incoming_content.file, file);
331        assert_eq!(incoming_content.content_type.unwrap(), content_type);
332        assert_eq!(incoming_content.content_disposition, Some(content_disposition));
333    }
334
335    #[test]
336    fn multipart_mixed_content_utf8_filename_conversions() {
337        let file = "s⌽me UTF-8 Ťext".as_bytes();
338        let content_type = "text/plain";
339        let content_disposition = ContentDisposition::new(ContentDispositionType::Attachment)
340            .with_filename(Some("fȈlƩnąmǝ.txt".to_owned()));
341
342        let outgoing_metadata = ContentMetadata::new();
343        let outgoing_content = FileOrLocation::File(Content {
344            file: file.to_vec(),
345            content_type: Some(content_type.to_owned()),
346            content_disposition: Some(content_disposition.clone()),
347        });
348
349        let response =
350            try_into_multipart_mixed_response::<Vec<u8>>(&outgoing_metadata, &outgoing_content)
351                .unwrap();
352
353        let (_incoming_metadata, incoming_content) =
354            try_from_multipart_mixed_response(response).unwrap();
355
356        assert_matches!(incoming_content, FileOrLocation::File(incoming_content));
357        assert_eq!(incoming_content.file, file);
358        assert_eq!(incoming_content.content_type.unwrap(), content_type);
359        assert_eq!(incoming_content.content_disposition, Some(content_disposition));
360    }
361
362    #[test]
363    fn multipart_mixed_location_conversions() {
364        let location = "https://server.local/media/filename.txt";
365
366        let outgoing_metadata = ContentMetadata::new();
367        let outgoing_content = FileOrLocation::Location(location.to_owned());
368
369        let response =
370            try_into_multipart_mixed_response::<Vec<u8>>(&outgoing_metadata, &outgoing_content)
371                .unwrap();
372
373        let (_incoming_metadata, incoming_content) =
374            try_from_multipart_mixed_response(response).unwrap();
375
376        assert_matches!(incoming_content, FileOrLocation::Location(incoming_location));
377        assert_eq!(incoming_location, location);
378    }
379
380    #[test]
381    fn multipart_mixed_deserialize_invalid() {
382        // Missing boundary in headers.
383        let body = "\r\n--abcdef\r\n\r\n{}\r\n--abcdef\r\nContent-Type: text/plain\r\n\r\nsome plain text\r\n--abcdef--";
384        let response = http::Response::builder()
385            .header(http::header::CONTENT_TYPE, "multipart/mixed")
386            .body(body)
387            .unwrap();
388
389        try_from_multipart_mixed_response(response).unwrap_err();
390
391        // Wrong boundary.
392        let body =
393            "\r\n--abcdef\r\n\r\n{}\r\n--abcdef\r\nContent-Type: text/plain\r\n\r\nsome plain text\r\n--abcdef--";
394        let response = http::Response::builder()
395            .header(http::header::CONTENT_TYPE, "multipart/mixed; boundary=012345")
396            .body(body)
397            .unwrap();
398
399        try_from_multipart_mixed_response(response).unwrap_err();
400
401        // Missing boundary in body.
402        let body =
403            "\r\n--abcdef\r\n\r\n{}\r\n--abcdef\r\nContent-Type: text/plain\r\n\r\nsome plain text";
404        let response = http::Response::builder()
405            .header(http::header::CONTENT_TYPE, "multipart/mixed; boundary=abcdef")
406            .body(body)
407            .unwrap();
408
409        try_from_multipart_mixed_response(response).unwrap_err();
410
411        // Missing header and content empty line separator in body part.
412        let body =
413            "\r\n--abcdef\r\n{}\r\n--abcdef\r\nContent-Type: text/plain\r\n\r\nsome plain text\r\n--abcdef--";
414        let response = http::Response::builder()
415            .header(http::header::CONTENT_TYPE, "multipart/mixed; boundary=abcdef")
416            .body(body)
417            .unwrap();
418
419        try_from_multipart_mixed_response(response).unwrap_err();
420
421        // Control character in header.
422        let body =
423            "\r\n--abcdef\r\n\r\n{}\r\n--abcdef\r\nContent-Type: text/plain\r\nContent-Disposition: inline; filename=\"my\nfile\"\r\nsome plain text\r\n--abcdef--";
424        let response = http::Response::builder()
425            .header(http::header::CONTENT_TYPE, "multipart/mixed; boundary=abcdef")
426            .body(body)
427            .unwrap();
428
429        try_from_multipart_mixed_response(response).unwrap_err();
430
431        // Boundary without CRLF with preamble.
432        let body = "foo--abcdef\r\n\r\n{}\r\n--abcdef\r\n\r\nsome plain text\r\n--abcdef--";
433        let response = http::Response::builder()
434            .header(http::header::CONTENT_TYPE, "multipart/mixed; boundary=abcdef")
435            .body(body)
436            .unwrap();
437
438        try_from_multipart_mixed_response(response).unwrap_err();
439    }
440
441    #[test]
442    fn multipart_mixed_deserialize_valid() {
443        // Simple.
444        let body =
445            "\r\n--abcdef\r\ncontent-type: application/json\r\n\r\n{}\r\n--abcdef\r\ncontent-type: text/plain\r\n\r\nsome plain text\r\n--abcdef--";
446        let response = http::Response::builder()
447            .header(http::header::CONTENT_TYPE, "multipart/mixed; boundary=abcdef")
448            .body(body)
449            .unwrap();
450
451        let (_metadata, content) = try_from_multipart_mixed_response(response).unwrap();
452
453        assert_matches!(content, FileOrLocation::File(file_content));
454        assert_eq!(file_content.file, b"some plain text");
455        assert_eq!(file_content.content_type.unwrap(), "text/plain");
456        assert_eq!(file_content.content_disposition, None);
457
458        // Case-insensitive headers.
459        let body =
460            "\r\n--abcdef\r\nCONTENT-type: application/json\r\n\r\n{}\r\n--abcdef\r\nCONTENT-TYPE: text/plain\r\ncoNtenT-disPosItioN: attachment; filename=my_file.txt\r\n\r\nsome plain text\r\n--abcdef--";
461        let response = http::Response::builder()
462            .header(http::header::CONTENT_TYPE, "multipart/mixed; boundary=abcdef")
463            .body(body)
464            .unwrap();
465
466        let (_metadata, content) = try_from_multipart_mixed_response(response).unwrap();
467
468        assert_matches!(content, FileOrLocation::File(file_content));
469        assert_eq!(file_content.file, b"some plain text");
470        assert_eq!(file_content.content_type.unwrap(), "text/plain");
471        let content_disposition = file_content.content_disposition.unwrap();
472        assert_eq!(content_disposition.disposition_type, ContentDispositionType::Attachment);
473        assert_eq!(content_disposition.filename.unwrap(), "my_file.txt");
474
475        // Extra whitespace.
476        let body =
477            "   \r\n--abcdef\r\ncontent-type:   application/json   \r\n\r\n {} \r\n--abcdef\r\ncontent-type: text/plain  \r\n\r\nsome plain text\r\n--abcdef--  ";
478        let response = http::Response::builder()
479            .header(http::header::CONTENT_TYPE, "multipart/mixed; boundary=abcdef")
480            .body(body)
481            .unwrap();
482
483        let (_metadata, content) = try_from_multipart_mixed_response(response).unwrap();
484
485        assert_matches!(content, FileOrLocation::File(file_content));
486        assert_eq!(file_content.file, b"some plain text");
487        assert_eq!(file_content.content_type.unwrap(), "text/plain");
488        assert_eq!(file_content.content_disposition, None);
489
490        // Missing CR except in boundaries.
491        let body =
492            "\r\n--abcdef\ncontent-type: application/json\n\n{}\r\n--abcdef\ncontent-type: text/plain  \n\nsome plain text\r\n--abcdef--";
493        let response = http::Response::builder()
494            .header(http::header::CONTENT_TYPE, "multipart/mixed; boundary=abcdef")
495            .body(body)
496            .unwrap();
497
498        let (_metadata, content) = try_from_multipart_mixed_response(response).unwrap();
499
500        assert_matches!(content, FileOrLocation::File(file_content));
501        assert_eq!(file_content.file, b"some plain text");
502        assert_eq!(file_content.content_type.unwrap(), "text/plain");
503        assert_eq!(file_content.content_disposition, None);
504
505        // No leading CRLF (and no preamble)
506        let body = "--abcdef\r\n\r\n{}\r\n--abcdef\r\n\r\nsome plain text\r\n--abcdef--";
507        let response = http::Response::builder()
508            .header(http::header::CONTENT_TYPE, "multipart/mixed; boundary=abcdef")
509            .body(body)
510            .unwrap();
511
512        let (_metadata, content) = try_from_multipart_mixed_response(response).unwrap();
513
514        assert_matches!(content, FileOrLocation::File(file_content));
515        assert_eq!(file_content.file, b"some plain text");
516        assert_eq!(file_content.content_type, None);
517        assert_eq!(file_content.content_disposition, None);
518
519        // Boundary text in preamble, but no leading CRLF, so it should be
520        // ignored.
521        let body =
522            "foo--abcdef\r\n--abcdef\r\n\r\n{}\r\n--abcdef\r\n\r\nsome plain text\r\n--abcdef--";
523        let response = http::Response::builder()
524            .header(http::header::CONTENT_TYPE, "multipart/mixed; boundary=abcdef")
525            .body(body)
526            .unwrap();
527
528        let (_metadata, content) = try_from_multipart_mixed_response(response).unwrap();
529
530        assert_matches!(content, FileOrLocation::File(file_content));
531        assert_eq!(file_content.file, b"some plain text");
532        assert_eq!(file_content.content_type, None);
533        assert_eq!(file_content.content_disposition, None);
534
535        // No body part headers.
536        let body = "\r\n--abcdef\r\n\r\n{}\r\n--abcdef\r\n\r\nsome plain text\r\n--abcdef--";
537        let response = http::Response::builder()
538            .header(http::header::CONTENT_TYPE, "multipart/mixed; boundary=abcdef")
539            .body(body)
540            .unwrap();
541
542        let (_metadata, content) = try_from_multipart_mixed_response(response).unwrap();
543
544        assert_matches!(content, FileOrLocation::File(file_content));
545        assert_eq!(file_content.file, b"some plain text");
546        assert_eq!(file_content.content_type, None);
547        assert_eq!(file_content.content_disposition, None);
548
549        // Raw UTF-8 filename (some kind of compatibility with multipart/form-data).
550        let body =
551            "\r\n--abcdef\r\ncontent-type: application/json\r\n\r\n{}\r\n--abcdef\r\ncontent-type: text/plain\r\ncontent-disposition: inline; filename=\"ȵ⌾Ⱦԩ💈Ňɠ\"\r\n\r\nsome plain text\r\n--abcdef--";
552        let response = http::Response::builder()
553            .header(http::header::CONTENT_TYPE, "multipart/mixed; boundary=abcdef")
554            .body(body)
555            .unwrap();
556
557        let (_metadata, content) = try_from_multipart_mixed_response(response).unwrap();
558
559        assert_matches!(content, FileOrLocation::File(file_content));
560        assert_eq!(file_content.file, b"some plain text");
561        assert_eq!(file_content.content_type.unwrap(), "text/plain");
562        let content_disposition = file_content.content_disposition.unwrap();
563        assert_eq!(content_disposition.disposition_type, ContentDispositionType::Inline);
564        assert_eq!(content_disposition.filename.unwrap(), "ȵ⌾Ⱦԩ💈Ňɠ");
565    }
566}