~nickbp/kapiti

f9aaeafba89e0ef441ff0998edb56e4e66858a28 — Nick Parker 7 months ago 3d6e5d9
Support hardcoded block for firefox canary domain (#31)
4 files changed, 127 insertions(+), 81 deletions(-)

M src/filter/filter.rs
M src/filter/reader.rs
M src/lookup.rs
M src/runner.rs
M src/filter/filter.rs => src/filter/filter.rs +40 -27
@@ 55,8 55,8 @@ impl<'a> Iterator for DomainParentIter<'a> {
}

pub struct Filter {
    overrides: Vec<reader::FileEntries>,
    blocks: Vec<reader::FileEntries>,
    overrides: Vec<reader::FilterEntries>,
    blocks: Vec<reader::FilterEntries>,
    filters_dir: PathBuf,
    fetch_client: Client<hyper_smol::SmolConnector, Body>,
}


@@ 88,11 88,13 @@ impl Filter {
    }

    pub fn update_override(self: &mut Filter, override_path: &String) -> Result<()> {
        let file_entries = reader::read(reader::FileInfo {
            source_path: override_path.clone(),
            local_path: override_path.clone(),
            filter_type: reader::FilterType::OVERRIDE,
        })?;
        let file_entries = reader::read(
            reader::FilterType::OVERRIDE,
            reader::FileInfo {
                source_path: override_path.clone(),
                local_path: override_path.clone()
            }
        )?;

        // Before adding new entry, check for existing entry to be replaced/updated.
        upsert_entries(&mut self.overrides, file_entries);


@@ 112,12 114,13 @@ impl Filter {
            timeout_ms,
        )
        .await?;
        let info = reader::FileInfo {
            source_path: hosts_entry.clone(),
            local_path: download_path_str,
            filter_type: reader::FilterType::BLOCK,
        };
        let file_entries = reader::read(info)?;
        let file_entries = reader::read(
            reader::FilterType::BLOCK,
            reader::FileInfo {
                source_path: hosts_entry.clone(),
                local_path: download_path_str,
            }
        )?;
        if !file_entries.is_empty() {
            // Note: In theory we could dedupe entries across different blocks to save memory.
            // However this causes problems if we want to granularly update individual files.


@@ 132,7 135,13 @@ impl Filter {
        Ok(())
    }

    pub fn check(self: &Filter, host: &String) -> Option<(&reader::FileInfo, &reader::FileEntry)> {
    pub fn set_hardcoded_block(self: &mut Filter, block_names: &[&str]) -> Result<()> {
        let hardcoded_entries = reader::block_hardcoded(block_names)?;
        upsert_entries(&mut self.blocks, hardcoded_entries);
        Ok(())
    }

    pub fn check(self: &Filter, host: &String) -> Option<(&Option<reader::FileInfo>, &reader::FilterEntry)> {
        // Go over domains in ancestor order, checking all blocks for each ancestor.
        // For example check all files for 'www.example.com', then each again for 'example.com'.
        // This allows file B with 'www.example.com' to take precedence over file A with 'example.com'


@@ 145,14 154,14 @@ impl Filter {
            for override_entry in &self.overrides {
                match override_entry.get(&domain) {
                    // Found in an override file: Tell upstream to let it through or use provided override value
                    Some(entry) => return Some((override_entry.info(), entry)),
                    Some(entry) => return Some((&override_entry.info, entry)),
                    None => {}
                }
            }
            for block in &self.blocks {
                match block.get(&domain) {
                    // Found in block: Tell upstream to block it or use filter-provided override
                    Some(entry) => return Some((block.info(), entry)),
                    Some(entry) => return Some((&block.info, entry)),
                    None => {}
                }
            }


@@ 203,18 212,22 @@ async fn update_url(
    }
}

fn upsert_entries(entries: &mut Vec<reader::FileEntries>, new_entry: reader::FileEntries) {
    // Before adding new entry, check for existing entry to be replaced/updated.
    for i in 0..entries.len() {
        let entry = entries.get(i).expect("incoherent vector size");
        if entry.info.local_path == new_entry.info.local_path {
            // Delete or replace existing version
            if new_entry.is_empty() {
                entries.remove(i);
            } else {
                entries.insert(i, new_entry);
fn upsert_entries(entries: &mut Vec<reader::FilterEntries>, new_entry: reader::FilterEntries) {
    if let Some(new_file_info) = &new_entry.info {
        // Before adding a new file entry, check for an existing file entry to be replaced/updated.
        for i in 0..entries.len() {
            let entry = entries.get(i).expect("incoherent vector size");
            if let Some(existing_file_info) = &entry.info {
                if existing_file_info.local_path == new_file_info.local_path {
                    // Delete or replace existing version
                    if new_entry.is_empty() {
                        entries.remove(i);
                    } else {
                        entries.insert(i, new_entry);
                    }
                    return;
                }
            }
            return;
        }
    }
    // Add new entry

M src/filter/reader.rs => src/filter/reader.rs +65 -47
@@ 22,11 22,10 @@ pub enum FilterType {
    BLOCK,
}

#[derive(Debug)]
#[derive(Clone, Debug)]
pub struct FileInfo {
    pub source_path: String,
    pub local_path: String,
    pub filter_type: FilterType,
}

/// If this is entry is found for a host, then:


@@ 36,30 35,32 @@ pub struct FileInfo {
/// But in that case would you be putting custom host entries locally if you had a DNS server with the right information available upstream?
/// Therefore we explicitly do NOT support misc record types like MX and SRV for hostnames that have a filter entry.
#[derive(Clone, Debug)]
pub struct FileEntry {
    pub line_num: usize,
pub struct FilterEntry {
    pub line_num: Option<usize>,
    pub dest_ipv4: Option<Ipv4Addr>,
    pub dest_ipv6: Option<Ipv6Addr>,
}

pub struct FileEntries {
    pub info: FileInfo,
    entries: HashMap<String, FileEntry>,
pub struct FilterEntries {
    pub filter_type: FilterType,
    pub info: Option<FileInfo>,
    entries: HashMap<String, FilterEntry>,
}

impl FileEntries {
    fn new(info: FileInfo) -> FileEntries {
        FileEntries {
impl FilterEntries {
    fn new(filter_type: FilterType, info: Option<FileInfo>) -> FilterEntries {
        FilterEntries {
            filter_type,
            info,
            entries: HashMap::new(),
        }
    }

    fn add_ipv4(self: &mut FileEntries, line_num: usize, host: String, dest: Option<Ipv4Addr>) {
    fn add_ipv4_line(self: &mut FilterEntries, line_num: usize, host: String, dest: Option<Ipv4Addr>) {
        // If the entry is already present, keep it and just update the ipv4 value.
        // This is mainly for the case of both ipv4+ipv6 entries in the same file.
        let initial_val = FileEntry {
            line_num,
        let initial_val = FilterEntry {
            line_num: Some(line_num),
            dest_ipv4: dest,
            dest_ipv6: None,
        };


@@ 68,15 69,15 @@ impl FileEntries {
        // This may be inaccurate if the file has both IPv4 and IPv6 entries for a given hostname on different
        // lines in the same file, for example with "localhost" in /etc/hosts. But this is a big corner case
        // and isn't worth the complexity to deal with multiple per-protocol line numbers.
        map_val.line_num = line_num;
        map_val.line_num = Some(line_num);
        map_val.dest_ipv4 = dest;
    }

    fn add_ipv6(self: &mut FileEntries, line_num: usize, host: String, dest: Option<Ipv6Addr>) {
    fn add_ipv6_line(self: &mut FilterEntries, line_num: usize, host: String, dest: Option<Ipv6Addr>) {
        // If the entry is already present, keep it and just update the ipv6 value.
        // This is mainly for the case of both ipv4+ipv6 entries in the same file.
        let initial_val = FileEntry {
            line_num,
        let initial_val = FilterEntry {
            line_num: Some(line_num),
            dest_ipv4: None,
            dest_ipv6: dest,
        };


@@ 85,53 86,70 @@ impl FileEntries {
        // This may be inaccurate if the file has both IPv4 and IPv6 entries for a given hostname on different
        // lines in the same file, for example with "localhost" in /etc/hosts. But this is a big corner case
        // and isn't worth the complexity to deal with multiple per-protocol line numbers.
        map_val.line_num = line_num;
        map_val.line_num = Some(line_num);
        map_val.dest_ipv6 = dest;
    }

    pub fn get(self: &FileEntries, host: &String) -> Option<&FileEntry> {
    /// Adds a hardcoded block entry for the specified host.
    fn add_hardcoded_block(self: &mut FilterEntries, host: String) {
        self.entries.insert(
            host,
            FilterEntry {
                line_num: None,
                dest_ipv4: None,
                dest_ipv6: None,
            }
        );
    }

    pub fn get(self: &FilterEntries, host: &String) -> Option<&FilterEntry> {
        self.entries.get(host)
    }

    pub fn is_empty(self: &FileEntries) -> bool {
    pub fn is_empty(self: &FilterEntries) -> bool {
        self.entries.is_empty()
    }
}

    pub fn info(self: &FileEntries) -> &FileInfo {
        &self.info
/// Creates a list of entries from the provided hardcoded hostnames to block
pub fn block_hardcoded(block_names: &[&str]) -> Result<FilterEntries> {
    let mut entries = FilterEntries::new(FilterType::BLOCK, None);
    for name in block_names {
        entries.add_hardcoded_block(name.to_string());
    }
    Ok(entries)
}

/// Reads an override or block file from disk, streaming the list of entries
pub fn read(info: FileInfo) -> Result<FileEntries> {
/// Reads an override or block file from disk, returning a parsed list of entries
pub fn read(filter_type: FilterType, info: FileInfo) -> Result<FilterEntries> {
    let path_str = info.local_path.clone();
    let path = Path::new(&path_str);
    let file = File::open(path).with_context(|| format!("Failed to open file {:?}", info))?;
    if path::is_zstd_extension(path) {
        read_imp(info, zstd::stream::Decoder::new(file)?)
        read_imp(filter_type, info, zstd::stream::Decoder::new(file)?)
    } else {
        read_imp(info, file)
        read_imp(filter_type, info, file)
    }
}

fn read_imp<T: Read>(info: FileInfo, file: T) -> Result<FileEntries> {
fn read_imp<T: Read>(filter_type: FilterType, info: FileInfo, file: T) -> Result<FilterEntries> {
    let mut reader = BufReader::new(file);
    let mut buf = String::new();
    let mut line_num = 0;
    let mut file_entries = FileEntries::new(info);
    let mut entries = FilterEntries::new(filter_type, Some(info.clone()));
    loop {
        line_num += 1;
        let len = reader
            .read_line(&mut buf)
            .with_context(|| format!("Failed to read file {:?}", file_entries.info))?;
            .with_context(|| format!("Failed to read file {:?}", info))?;
        if len == 0 {
            return Ok(file_entries); // EOF
            return Ok(entries); // EOF
        } else {
            match handle_line(&buf, line_num, &mut file_entries) {
            match handle_line(&buf, line_num, &mut entries, &info) {
                Ok(()) => {}
                Err(e) => warn!(
                    "Failed to parse {:?} line {}: {}",
                    file_entries.info, line_num, e
                    info, line_num, e
                ),
            };
            buf.clear();


@@ 139,7 157,7 @@ fn read_imp<T: Read>(info: FileInfo, file: T) -> Result<FileEntries> {
    }
}

fn handle_line(line: &str, line_num: usize, out: &mut FileEntries) -> Result<()> {
fn handle_line(line: &str, line_num: usize, out: &mut FilterEntries, info: &FileInfo) -> Result<()> {
    // Cut out any comment from the line before tokenizing words
    let mut words = match line.find('#') {
        Some(comment_start) => line[..comment_start].split_ascii_whitespace(),


@@ 166,19 184,19 @@ fn handle_line(line: &str, line_num: usize, out: &mut FileEntries) -> Result<()>
                            let out_dest = validate_filter(
                                ipv6_dest,
                                ipv6_dest.is_loopback(),
                                &out.info,
                                &out,
                                line_num,
                            )?;
                            out.add_ipv6(
                            out.add_ipv6_line(
                                line_num,
                                validate_host(second, &out.info, line_num)?,
                                validate_host(second, &info, line_num)?,
                                out_dest,
                            );
                            // Pass any remaining host mappings on the line as well
                            for word in words {
                                out.add_ipv6(
                                out.add_ipv6_line(
                                    line_num,
                                    validate_host(word, &out.info, line_num)?,
                                    validate_host(word, &info, line_num)?,
                                    out_dest,
                                );
                            }


@@ 194,19 212,19 @@ fn handle_line(line: &str, line_num: usize, out: &mut FileEntries) -> Result<()>
                            let out_dest = validate_filter(
                                ipv4_dest,
                                ipv4_dest.is_loopback(),
                                &out.info,
                                &out,
                                line_num,
                            )?;
                            out.add_ipv4(
                            out.add_ipv4_line(
                                line_num,
                                validate_host(second, &out.info, line_num)?,
                                validate_host(second, &info, line_num)?,
                                out_dest,
                            );
                            // Pass any remaining host mappings on the line as well
                            for word in words {
                                out.add_ipv4(
                                out.add_ipv4_line(
                                    line_num,
                                    validate_host(word, &out.info, line_num)?,
                                    validate_host(word, &info, line_num)?,
                                    out_dest,
                                );
                            }


@@ 216,7 234,7 @@ fn handle_line(line: &str, line_num: usize, out: &mut FileEntries) -> Result<()>
                }
                // No second word: blocklist style, no destination
                None => {
                    out.add_ipv4(line_num, validate_host(first, &out.info, line_num)?, None);
                    out.add_ipv4_line(line_num, validate_host(first, &info, line_num)?, None);
                    Ok(())
                }
            }


@@ 229,10 247,10 @@ fn handle_line(line: &str, line_num: usize, out: &mut FileEntries) -> Result<()>
fn validate_filter<T: Display>(
    ip_dest: T,
    is_loopback: bool,
    info: &FileInfo,
    entries: &FilterEntries,
    line_num: usize,
) -> Result<Option<T>> {
    match info.filter_type {
    match entries.filter_type {
        FilterType::BLOCK => {
            // If we're looking at what's supposed to be a block, make sure that any IPs
            // provided are LOCAL/LOOPBACK ONLY. Otherwise we risk the possibility of a


@@ 242,7 260,7 @@ fn validate_filter<T: Display>(
            // or the destination should be something like 127.0.0.1.
            // Meanwhile things declared as overrides do not have this restriction.
            if !is_loopback {
                bail!("POSSIBLE DNS HIJACK ATTEMPT BY BLOCK SOURCE: Invalid non-local 'block' IP in {:?} line {}: {}", info, line_num, ip_dest);
                bail!("POSSIBLE DNS HIJACK ATTEMPT BY BLOCK SOURCE: Invalid non-local 'block' IP in {:?} line {}: {}", entries.info, line_num, ip_dest);
            }

            // For blocks, just omit the (local) destination IP since it's a stub value anyway.

M src/lookup.rs => src/lookup.rs +11 -7
@@ 39,7 39,7 @@ impl Lookup {
        if let Some(request) = DNSMessageDecoder::new().decode(packet_buffer)? {
            debug!("Incoming request: {}", request);
            if let Some((question, request_info)) = get_question(&request)? {
                let filter_result: Option<(String, reader::FileEntry)>;
                let filter_result: Option<(String, reader::FilterEntry)>;
                match self.filter.lock() {
                    Err(e) => bail!("Failed to lock query filter: {:?}", e),
                    Ok(filter_locked) => {


@@ 48,7 48,11 @@ impl Lookup {
                            filter_locked
                            .check(&request_info.name)
                            .map(|(file_info, file_entry)| {
                                (file_info.source_path.clone(), (*file_entry).clone())
                                if let Some(f) = file_info {
                                    (f.source_path.clone(), (*file_entry).clone())
                                } else {
                                    (String::new(), (*file_entry).clone())
                                }
                            });
                    }
                }


@@ 104,12 108,12 @@ fn write_filter_response(
    question: &Question,
    opt: &Option<OPT>,
    filter_source: &String,
    entry: &reader::FileEntry,
    entry: &reader::FilterEntry,
) -> Result<()> {
    if let (None, None) = (entry.dest_ipv4, entry.dest_ipv6) {
        // Return blocked domain
        debug!(
            "Got block entry for {} from {} line {}: dest=NXDOMAIN",
            "Got block entry for {} from {} line {:?}: dest=NXDOMAIN",
            request_info.name, filter_source, entry.line_num
        );
        ENCODER.encode_local_response(


@@ 124,7 128,7 @@ fn write_filter_response(
    } else if request_info.resource_type == enums_generated::ResourceType::A {
        // Return configured IPv4/A override
        debug!(
            "Got override {:?} entry for {} from {} line {}: dest={:?}",
            "Got override {:?} entry for {} from {} line {:?}: dest={:?}",
            request_info.resource_type,
            request_info.name,
            filter_source,


@@ 143,7 147,7 @@ fn write_filter_response(
    } else if request_info.resource_type == enums_generated::ResourceType::AAAA {
        // Return configured IPv6/AAAA override
        debug!(
            "Got override {:?} entry for {} from {} line {}: dest={:?}",
            "Got override {:?} entry for {} from {} line {:?}: dest={:?}",
            request_info.resource_type,
            request_info.name,
            filter_source,


@@ 165,7 169,7 @@ fn write_filter_response(
        // But if you had an upstream server with the right information, why would you be putting custom host entries locally?
        // Therefore we explicitly do NOT support misc record types like MX and SRV for hostnames that have an override entry.
        debug!(
            "Got override {:?} entry for {} from {} line {}: dest=NONE",
            "Got override {:?} entry for {} from {} line {:?}: dest=NONE",
            request_info.resource_type, request_info.name, filter_source, entry.line_num
        );
        ENCODER.encode_local_response(

M src/runner.rs => src/runner.rs +11 -0
@@ 18,6 18,14 @@ use crate::{cache, client, config, listen_tcp, listen_udp, lookup};
/// TCP size header is 16 bits, so max theoretical size is 64k
static MAX_TCP_BYTES: u16 = 65535;

/// Hardcoded hostnames that we should block for technical reasons
static HARDCODED_BLOCKED_HOSTS: &'static [&'static str] = &[
    // Placeholder domain for users to check that Kapiti is working
    "test-blocked.kapiti.io",
    // See https://support.mozilla.org/en-US/kb/canary-domain-use-application-dnsnet
    "use-application-dns.net",
];

/// Runs the server. Separate from main.rs to simplify testing in benchmarks
pub struct Runner {
    config: config::Config,


@@ 146,6 154,9 @@ impl Runner {
        // - refrain from killing the process if a download fails or something
        let resolver = client::upstream::parse_upstreams(cache_tx.clone(), &self.config.upstreams)?;
        let mut filter = filter::Filter::new(self.storage_dir.join("filters"), resolver)?;
        // Set up the hardcoded values first - for now they take priority over any manual configuration.
        // There isn't a good reason for a user to override a Kapiti test domain, for example.
        filter.set_hardcoded_block(HARDCODED_BLOCKED_HOSTS.into())?;
        // TODO(#30): Implement advanced filter blocks with applies_to support, and with allow support
        for (_name, conf) in &self.config.filters {
            // Allow these path lists to be unset/empty