~nickbp/originz

ref: fbaed2a25114cf06aaa5d509c0d19d28ac4faa6d originz/src/filter/reader.rs -rw-r--r-- 11.6 KiB
fbaed2a2Nick Parker Implement benchmark test for UDP client/UDP upstream (#10) 1 year, 10 months ago
                                                                                
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
#![deny(warnings, rust_2018_idioms)]

use anyhow::{bail, Context, Result};
use std::collections::HashMap;
use std::fmt::Display;
use std::fs::File;
use std::io::{BufRead, BufReader, Read};
use std::net::{Ipv4Addr, Ipv6Addr};
use std::path::Path;
use std::str::FromStr;

use tracing::warn;

use crate::filter::path;

#[derive(Debug)]
pub enum FilterType {
    /// Hosts that should be redirected (any IP) OR let through to upstream (no IP).
    /// This type is also compatible with any '/etc/hosts' file.
    OVERRIDE,

    /// Hosts that should be blackholed.
    /// It is invalid for any entry to have a non-local IP - avoids domain hijacking by block providers.
    BLOCK,
}

#[derive(Debug)]
pub struct FileInfo {
    pub source_path: String,
    pub local_path: String,
    pub filter_type: FilterType,
}

/// If this is entry is found for a host, then:
/// - ipv4 AND ipv6    BOTH None:   return NXDOMAIN for all record types (only if BOTH are None)
/// - ipv4 and/or ipv6 EITHER Some: return respective IP for A/AAAA, or "record not found". for all other record types return "record not found"
/// Note that it's a little ambiguous whether we should instead try going upstream when querying misc record types for hosts listed in filters,
/// But in that case would you be putting custom host entries locally if you had a DNS server with the right information available upstream?
/// Therefore we explicitly do NOT support misc record types like MX and SRV for hostnames that have a filter entry.
#[derive(Debug)]
pub struct FileEntry {
    pub line_num: usize,
    pub dest_ipv4: Option<Ipv4Addr>,
    pub dest_ipv6: Option<Ipv6Addr>,
}

pub struct FileEntries {
    pub info: FileInfo,
    entries: HashMap<String, FileEntry>,
}

impl FileEntries {
    fn new(info: FileInfo) -> FileEntries {
        FileEntries {
            info,
            entries: HashMap::new(),
        }
    }

    fn add_ipv4(self: &mut FileEntries, line_num: usize, host: String, dest: Option<Ipv4Addr>) {
        // If the entry is already present, keep it and just update the ipv4 value.
        // This is mainly for the case of both ipv4+ipv6 entries in the same file.
        let initial_val = FileEntry {
            line_num,
            dest_ipv4: dest,
            dest_ipv6: None,
        };
        let map_val = self.entries.entry(host).or_insert(initial_val);
        // Update line_num: just go with whatever line is last, since that's what we're using for the dest.
        // This may be inaccurate if the file has both IPv4 and IPv6 entries for a given hostname on different
        // lines in the same file, for example with "localhost" in /etc/hosts. But this is a big corner case
        // and isn't worth the complexity to deal with multiple per-protocol line numbers.
        map_val.line_num = line_num;
        map_val.dest_ipv4 = dest;
    }

    fn add_ipv6(self: &mut FileEntries, line_num: usize, host: String, dest: Option<Ipv6Addr>) {
        // If the entry is already present, keep it and just update the ipv6 value.
        // This is mainly for the case of both ipv4+ipv6 entries in the same file.
        let initial_val = FileEntry {
            line_num,
            dest_ipv4: None,
            dest_ipv6: dest,
        };
        let map_val = self.entries.entry(host).or_insert(initial_val);
        // Update line_num: just go with whatever line is last, since that's what we're using for the dest.
        // This may be inaccurate if the file has both IPv4 and IPv6 entries for a given hostname on different
        // lines in the same file, for example with "localhost" in /etc/hosts. But this is a big corner case
        // and isn't worth the complexity to deal with multiple per-protocol line numbers.
        map_val.line_num = line_num;
        map_val.dest_ipv6 = dest;
    }

    pub fn get(self: &FileEntries, host: &String) -> Option<&FileEntry> {
        self.entries.get(host)
    }

    pub fn is_empty(self: &FileEntries) -> bool {
        self.entries.is_empty()
    }

    pub fn info(self: &FileEntries) -> &FileInfo {
        &self.info
    }
}

/// Reads an override or block file from disk, streaming the list of entries
pub fn read(info: FileInfo) -> Result<FileEntries> {
    let path_str = info.local_path.clone();
    let path = Path::new(&path_str);
    let file = File::open(path).with_context(|| format!("Failed to open file {:?}", info))?;
    if path::is_zstd_extension(path) {
        read_imp(info, zstd::stream::Decoder::new(file)?)
    } else {
        read_imp(info, file)
    }
}

fn read_imp<T: Read>(info: FileInfo, file: T) -> Result<FileEntries> {
    let mut reader = BufReader::new(file);
    let mut buf = String::new();
    let mut line_num = 0;
    let mut file_entries = FileEntries::new(info);
    loop {
        line_num += 1;
        let len = reader
            .read_line(&mut buf)
            .with_context(|| format!("Failed to read file {:?}", file_entries.info))?;
        if len == 0 {
            return Ok(file_entries); // EOF
        } else {
            match handle_line(&buf, line_num, &mut file_entries) {
                Ok(()) => {}
                Err(e) => warn!(
                    "Failed to parse {:?} line {}: {}",
                    file_entries.info, line_num, e
                ),
            };
            buf.clear();
        }
    }
}

fn handle_line(line: &str, line_num: usize, out: &mut FileEntries) -> Result<()> {
    // Cut out any comment from the line before tokenizing words
    let mut words = match line.find('#') {
        Some(comment_start) => line[..comment_start].split_ascii_whitespace(),
        None => line.split_ascii_whitespace(),
    };

    // Support both formats:
    // - Hosts via override: <ip> <host1> [host2] [...]
    // - Block or override:  <host>
    match words.next() {
        // First word: either an IP for hosts-style override, or a hostname for block-style
        Some(first) => {
            match words.next() {
                // Second word: a hostname for hosts-style (and more hostnames may follow)
                Some(second) => {
                    match first.find(':') {
                        Some(_) => {
                            let ipv6_dest = Ipv6Addr::from_str(first).with_context(|| {
                                format!(
                                    "Failed to parse IPv6 address in {:?} line {}: {}",
                                    out.info, line_num, first
                                )
                            })?;
                            let out_dest = validate_filter(
                                ipv6_dest,
                                ipv6_dest.is_loopback(),
                                &out.info,
                                line_num,
                            )?;
                            out.add_ipv6(
                                line_num,
                                validate_host(second, &out.info, line_num)?,
                                out_dest,
                            );
                            // Pass any remaining host mappings on the line as well
                            for word in words {
                                out.add_ipv6(
                                    line_num,
                                    validate_host(word, &out.info, line_num)?,
                                    out_dest,
                                );
                            }
                            Ok(())
                        }
                        None => {
                            let ipv4_dest = Ipv4Addr::from_str(first).with_context(|| {
                                format!(
                                    "Failed to parse IPv4 address in {:?} line {}: {}",
                                    out.info, line_num, first
                                )
                            })?;
                            let out_dest = validate_filter(
                                ipv4_dest,
                                ipv4_dest.is_loopback(),
                                &out.info,
                                line_num,
                            )?;
                            out.add_ipv4(
                                line_num,
                                validate_host(second, &out.info, line_num)?,
                                out_dest,
                            );
                            // Pass any remaining host mappings on the line as well
                            for word in words {
                                out.add_ipv4(
                                    line_num,
                                    validate_host(word, &out.info, line_num)?,
                                    out_dest,
                                );
                            }
                            Ok(())
                        }
                    }
                }
                // No second word: blocklist style, no destination
                None => {
                    out.add_ipv4(line_num, validate_host(first, &out.info, line_num)?, None);
                    Ok(())
                }
            }
        }
        // Blank line (possibly after stripping any comments)
        None => Ok(()),
    }
}

fn validate_filter<T: Display>(
    ip_dest: T,
    is_loopback: bool,
    info: &FileInfo,
    line_num: usize,
) -> Result<Option<T>> {
    match info.filter_type {
        FilterType::BLOCK => {
            // If we're looking at what's supposed to be a block, make sure that any IPs
            // provided are LOCAL/LOOPBACK ONLY. Otherwise we risk the possibility of a
            // "block" maliciously setting other IPs as a sort of DNS hijacking.

            // In other words, block entries MUST either mention NO destination,
            // or the destination should be something like 127.0.0.1.
            // Meanwhile things declared as overrides do not have this restriction.
            if !is_loopback {
                bail!("POSSIBLE DNS HIJACK ATTEMPT BY BLOCK SOURCE: Invalid non-local 'block' IP in {:?} line {}: {}", info, line_num, ip_dest);
            }

            // For blocks, just omit the (local) destination IP since it's a stub value anyway.
            Ok(None)
        }
        FilterType::OVERRIDE => {
            // Declared as a override: No loopback requirement, and pass any requested destination upstream.
            Ok(Some(ip_dest))
        }
    }
}

/// From "man 5 hosts":
/// Host names may contain only alphanumeric characters, minus signs ("-"), and periods (".").
/// They must begin with an alphabetic character and end with an alphanumeric character.
/// HOWEVER, in reality both of these seem to be valid:
/// - hostnames that start with a number (in the subdomain)
/// - hostnames that contain '_'
fn validate_host(host: &str, file_info: &FileInfo, line_num: usize) -> Result<String> {
    if host.len() < 2 {
        bail!(
            "Invalid host of length {} in {:?} line {}: {}",
            host.len(),
            file_info,
            line_num,
            host
        );
    }
    for (idx, c) in host.char_indices() {
        if idx == host.len() - 1 {
            // Last char
            if !c.is_ascii_alphanumeric() {
                bail!(
                    "Invalid host in {:?} line {}: Last char must be alphanumeric: {}",
                    file_info,
                    line_num,
                    host
                );
            }
        } else {
            // First and middle char(s)
            if !c.is_ascii_alphanumeric() && c != '-' && c != '_' && c != '.' {
                bail!(
                    "Invalid host in {:?} line {}, middle chars must be alphanumeric, '-', or '.': {}",
                    file_info, line_num, host
                );
            }
        }
    }
    Ok(host.to_string())
}