~mort/coffeepaste

ref: b9f5ac1ec6061d264a74bd6c8d70c1854d491c05 coffeepaste/src/mime.rs -rw-r--r-- 3.7 KiB
b9f5ac1eMartin Dørum fix error in 400.html 10 months ago
                                                                                
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
use bytes::Bytes;

pub fn is_plain_text(ext: Option<&str>, mime: Option<&str>) -> bool {
    // I really wish there was a better way to do this...
    fn mime_is_plain_text(mime: &str) -> bool {
        return
            mime.starts_with("text/") || (mime.starts_with("application/") && (
                mime == "application/javascript" ||
                mime == "application/ecmascript" ||
                mime == "application/json" ||
                mime.ends_with("+json") ||
                mime == "application/json5" ||
                mime == "application/x-sh" ||
                mime == "application/x-csh" ||
                mime == "application/xml" ||
                mime.ends_with("+xml") ||
                mime == "application/x-cpio") ||
                mime == "application/x-latex" ||
                mime == "application/x-troff" ||
                mime.starts_with("application/x-troff-") ||
                mime == "application/x-httpd-php" ||
                mime == "application/x-perl" ||
                mime == "application/x-postscript" ||
                mime == "application/x-sql" ||
                mime == "application/x-tcl" ||
                mime == "application/x-tex" ||
                mime == "application/x-texinfo")
    }

    match ext {
        Some("data") => false,
        Some("auto") => true,
        _ => match mime {
            None => false,
            Some(mime) => mime_is_plain_text(mime),
        },
    }
}

pub fn from_ext(ext: &str) -> Option<&'static str> {
    match ext {
        "data" => Some("application/octet-stream"),
        "go" => Some("text/plain"),
        "heif" => Some("image/heif"),
        _ => mime_guess::from_ext(ext).first_raw(),
    }
}


fn bytes_matches_any(a: &Bytes, bs: &[&[i16]]) -> bool {
    fn matches(a: &Bytes, b: &[i16]) -> bool {
        if a.len() < b.len() {
            return false;
        }

        for i in 0..b.len() {
            if b[i] != -1 && a[i] != (b[i] as u8) {
                return false;
            }
        }

        return true;
    }

    for b in bs.iter() {
        if matches(a, b) {
            return true;
        }
    }

    return false;
}

fn bytes_are_binary(a: &Bytes, max: usize) -> bool {
    let len = std::cmp::min(a.len(), max);

    for i in 0..len {
        if (a[i] <= 0x06) || (a[i] >= 0x0e && a[i] <= 0x1f) {
            return true;
        }
    }

    let len_plus_padding = std::cmp::min(a.len(), max + 4);
    if let Err(err) = std::str::from_utf8(&a.slice(0..len_plus_padding)) {
        if err.valid_up_to() < max {
            return true;
        }
    }

    return false;
}

pub fn ext_from_file_content(content: &Bytes) -> (Option<&'static str>, Option<&'static str>) {
    if bytes_matches_any(content, &[
            &[0x47, 0x49, 0x46, 0x38, 0x37, 0x61],
            &[0x47, 0x49, 0x46, 0x38, 0x39, 0x61]]) {
        return (Some("gif"), Some("image/gif"));
    } else if bytes_matches_any(content, &[
            &[0xFF, 0xD8, 0xFF, 0xDB],
            &[0xFF, 0xD8, 0xFF, 0xE0, 0x00, 0x10, 0x4A, 0x46, 0x49, 0x46, 0x00, 0x01],
            &[0xFF, 0xD8, 0xFF, 0xEE],
            &[0xFF, 0xD8, 0xFF, 0xE1, -0x1, -0x1, 0x45, 0x78, 0x69, 0x66, 0x00, 0x00]]) {
        return (Some("jpg"), Some("image/jpeg"));
    } else if bytes_matches_any(content, &[
            &[0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A]]) {
        return (Some("png"), Some("image/png"));
    } else if bytes_matches_any(content, &[
            &[0x00, 0x00, 0x00, 0x20, 0x66, 0x74, 0x79, 0x70, 0x68, 0x65, 0x69, 0x63]]) {
        return (Some("heif"), Some("image/heif"));
    }

    // If it's invalid UTF-8, return "data", representing unknown binary data
    if bytes_are_binary(content, 256) {
        return (Some("data"), None);
    }

    return (None, None);
}