~nickbp/kapiti

792069fc54df03014f04de2bbccafb3057145e7f — Nick Parker 1 year, 7 months ago 564d9e4
Refactor filter handling a bit, add support for adblock-style domain filters
4 files changed, 1044 insertions(+), 303 deletions(-)

M src/filter/filter.rs
M src/filter/reader.rs
M src/lookup.rs
M src/runner.rs
M src/filter/filter.rs => src/filter/filter.rs +88 -59
@@ 2,7 2,7 @@ use std::path::{Path, PathBuf};
use std::vec::Vec;

use anyhow::{Context, Result};
use hyper::Client;
use hyper::{Client, Uri};
use sha2::{Digest, Sha256};

use crate::filter::{path, reader, updater};


@@ 51,57 51,61 @@ impl<'a> Iterator for DomainParentIter<'a> {
}

pub struct Filter {
    overrides: Vec<reader::FilterEntries>,
    blocks: Vec<reader::FilterEntries>,
    override_files: Vec<reader::FilterFile>,
    block_files: Vec<reader::FilterFile>,
}

impl Filter {
    pub fn new() -> Filter {
        Filter {
            overrides: vec![],
            blocks: vec![],
            override_files: vec![],
            block_files: vec![],
        }
    }

    pub fn update_entries(self: &mut Filter, entrieses: Vec<reader::FilterEntries>) {
        for entries in entrieses {
            match entries.filter_type {
                reader::FilterType::BLOCK => upsert_entries(&mut self.blocks, entries),
                reader::FilterType::OVERRIDE => upsert_entries(&mut self.overrides, entries),
            }
    pub fn update_override_entries(self: &mut Filter, files: Vec<reader::FilterFile>) {
        for file in files {
            upsert_entries(&mut self.override_files, file);
        }
    }

    pub fn update_block_entries(self: &mut Filter, files: Vec<reader::FilterFile>) {
        for file in files {
            upsert_entries(&mut self.block_files, file);
        }
    }

    pub fn set_hardcoded_block(self: &mut Filter, block_names: &[&str]) -> Result<()> {
        let hardcoded_entries = reader::block_hardcoded(block_names)?;
        upsert_entries(&mut self.blocks, hardcoded_entries);
        upsert_entries(&mut self.block_files, hardcoded_entries);
        Ok(())
    }

    /// Search all filters for the domain in ancestor order.
    /// For example check all filters for 'www.example.com', then all again for 'example.com'.
    /// This allows file B with 'www.example.com' to take precedence over file A with 'example.com'
    /// Meanwhile if two files mention the exact same name then the first file in the list wins.
    /// So if file A says "127.0.0.1" and file B says "172.16.0.1" then "127.0.0.1" wins.
    pub fn check(
        self: &Filter,
        host: &String,
    ) -> Option<(&Option<reader::FileInfo>, &reader::FilterEntry)> {
        // Go over domains in ancestor order, checking all blocks for each ancestor.
        // For example check all files for 'www.example.com', then each again for 'example.com'.
        // This allows file B with 'www.example.com' to take precedence over file A with 'example.com'

        // Meanwhile if two files mention the exact same domain then the first file in the list wins.
        // So if file A says "127.0.0.1" and file B says "172.16.0.1" then "127.0.0.1" wins.

        // NOTE: wildcards like 'foo*.example.com', 'foo.*.example.com' are not supported,
        // and it'd probably be too much trouble to deal with trying to support them.
        // Meanwhile due to how we parse the domains, wildcards like '*.example.com' are already supported.
        for domain_str in DomainParentIter::new(&host) {
            let domain = domain_str.to_string();
            for override_entry in &self.overrides {
                match override_entry.get(&domain) {
            for override_file in &self.override_files {
                match override_file.content.get(&domain) {
                    // Found in an override file: Tell upstream to let it through or use provided override value
                    Some(entry) => return Some((&override_entry.info, entry)),
                    Some(entry) => return Some((&override_file.info, entry)),
                    None => {}
                }
            }
            for block in &self.blocks {
                match block.get(&domain) {
            for block_file in &self.block_files {
                match block_file.content.get(&domain) {
                    // Found in block: Tell upstream to block it or use filter-provided override
                    Some(entry) => return Some((&block.info, entry)),
                    Some(entry) => return Some((&block_file.info, entry)),
                    None => {}
                }
            }


@@ 113,51 117,76 @@ impl Filter {

/// Returns the local path where the file was downloaded,
/// and whether the file was updated (true) or the update was skipped (false)
pub async fn update_url(
pub async fn update_if_url(
    fetch_client: &Client<hyper_smol::SmolConnector>,
    filters_dir: &PathBuf,
    uri_string: &String,
    filter_path_or_url: &String,
    timeout_ms: u64,
) -> Result<(String, bool)> {
    let fetcher = fetcher::Fetcher::new(10 * 1024 * 1024, None);
    // We download files to the exact SHA of the URL string we were provided.
    // This is an easy way to avoid filename collisions in URLs: example1.com/hosts vs example2.com/hosts
    // If the user changes the URL string then that changes the SHA, perfect for "cache invalidation" purposes.
    let hosts_path_sha = Sha256::digest(uri_string.as_bytes());
    let download_path = Path::new(filters_dir).join(format!(
        "{:x}.sha256.{}",
        hosts_path_sha,
        path::ZSTD_EXTENSION
    ));
    let downloaded = updater::update_file(
        fetch_client,
        &fetcher,
        uri_string,
        download_path.as_path(),
        timeout_ms,
    )
    .await?;
    Ok((
        download_path
            .to_str()
            .with_context(|| format!("busted download path: {:?}", download_path))?
            .to_string(),
        downloaded,
    ))
) -> Result<(reader::FileInfo, bool)> {
    // Check if this is a file or URL
    if let Ok(filter_uri) = Uri::try_from(filter_path_or_url) {
        // Filesystem paths can get parsed as URLs with no scheme
        if filter_uri.scheme() == None {
            return Ok((
                reader::FileInfo {
                    source_path: filter_path_or_url.clone(),
                    local_path: filter_path_or_url.clone(),
                },
                false
            ))
        }

        let fetcher = fetcher::Fetcher::new(10 * 1024 * 1024, None);
        // We download files to the exact SHA of the URL string we were provided.
        // This is an easy way to avoid filename collisions in URLs: example1.com/hosts vs example2.com/hosts
        // If the user changes the URL string then that changes the SHA, perfect for "cache invalidation" purposes.
        let hosts_path_sha = Sha256::digest(filter_path_or_url.as_bytes());
        let download_path = Path::new(filters_dir).join(format!(
            "{:x}.sha256.{}",
            hosts_path_sha,
            path::ZSTD_EXTENSION
        ));
        let downloaded = updater::update_file(
            fetch_client,
            &fetcher,
            filter_path_or_url,
            download_path.as_path(),
            timeout_ms,
        )
            .await?;
        Ok((
            reader::FileInfo {
                source_path: filter_path_or_url.clone(),
                local_path: download_path
                    .to_str()
                    .with_context(|| format!("busted download path: {:?}", download_path))?
                    .to_string()
            },
            downloaded,
        ))
    } else {
        return Ok((
            reader::FileInfo {
                source_path: filter_path_or_url.clone(),
                local_path: filter_path_or_url.clone(),
            },
            false
        ))
    }
}

fn upsert_entries(entries: &mut Vec<reader::FilterEntries>, new_entry: reader::FilterEntries) {
    if let Some(new_file_info) = &new_entry.info {
fn upsert_entries(entries: &mut Vec<reader::FilterFile>, new_file: reader::FilterFile) {
    if let Some(new_file_info) = &new_file.info {
        // Before adding a new file entry, check for an existing file entry to be replaced/updated.
        for i in 0..entries.len() {
            let entry = entries.get(i).expect("incoherent vector size");
            if let Some(existing_file_info) = &entry.info {
                if existing_file_info.local_path == new_file_info.local_path {
                    // Delete or replace existing version
                    if new_entry.is_empty() {
                    if new_file.content.is_empty() {
                        entries.remove(i);
                    } else {
                        entries.insert(i, new_entry);
                        entries.insert(i, new_file);
                    }
                    return;
                }


@@ 165,8 194,8 @@ fn upsert_entries(entries: &mut Vec<reader::FilterEntries>, new_entry: reader::F
        }
    }
    // Add new entry
    if !new_entry.is_empty() {
        entries.push(new_entry);
    if !new_file.content.is_empty() {
        entries.push(new_file);
    }
}


M src/filter/reader.rs => src/filter/reader.rs +923 -176
@@ 1,72 1,135 @@
use anyhow::{bail, Context, Result};
use std::collections::HashMap;
use std::fmt::Display;
use std::fs::File;
use std::io::{BufRead, BufReader, Read};
use std::net::{Ipv4Addr, Ipv6Addr};
use std::path::Path;
use std::str::FromStr;
use std::str::{FromStr, SplitAsciiWhitespace};

use tracing::warn;
use tracing::{trace, warn};

use crate::filter::path;

#[derive(Debug)]
pub enum FilterType {
    /// Hosts that should be redirected (any IP) OR let through to upstream (no IP).
    /// This type is also compatible with any '/etc/hosts' file.
    OVERRIDE,

    /// Hosts that should be blackholed.
    /// It is invalid for any entry to have a non-local IP - avoids domain hijacking by block providers.
    BLOCK,
pub struct FilterFile {
    // Info about the source file/URL, if any
    pub info: Option<FileInfo>,
    // The filter content/logic itself
    pub content: FilterContent,
}

#[derive(Clone, Debug)]
pub struct FileInfo {
    /// The path or URL for this filter as provided in the config
    pub source_path: String,
    /// For remote files, this is where the cached copy is found locally.
    /// For local files, this is the same as source_path.
    pub local_path: String,
}

/// If this is entry is found for a host, then:
/// - ipv4 AND ipv6    BOTH None:   return NXDOMAIN for all record types (only if BOTH are None)
/// - ipv4 and/or ipv6 EITHER Some: return respective IP for A/AAAA, or "record not found". for all other record types return "record not found"
/// Note that it's a little ambiguous whether we should instead try going upstream when querying misc record types for hosts listed in filters,
/// But in that case would you be putting custom host entries locally if you had a DNS server with the right information available upstream?
/// Therefore we explicitly do NOT support misc record types like MX and SRV for hostnames that have a filter entry.
#[derive(Clone, Debug)]
/// Block filters:
///   If this entry is found for a host, then return NXDOMAIN for all record types
/// Override filters:
///   If this is entry is found for a host, then return the specified values for A records and/or AAAA records.
///   For all other record types return no record.
#[derive(Clone, Debug, PartialEq)]
pub struct FilterEntry {
    /// The line number from the original filter file where this entry was found
    pub line_num: Option<usize>,

    /// An override value for returning the upstream result (for 'allow' entries)
    pub dest_upstream: bool,
    /// An override value for A records
    pub dest_ipv4: Option<Ipv4Addr>,
    /// An override value for AAAA records
    pub dest_ipv6: Option<Ipv6Addr>,
}

pub struct FilterEntries {
    pub filter_type: FilterType,
    pub info: Option<FileInfo>,
pub struct FilterContent {
    /// Mapping of hostnames to filters.
    /// Depending on the context, the filters can either be blocks or overrides.
    entries: HashMap<String, FilterEntry>,
}

impl FilterEntries {
    fn new(filter_type: FilterType, info: Option<FileInfo>) -> FilterEntries {
        FilterEntries {
            filter_type,
            info,
impl FilterContent {
    fn new() -> FilterContent {
        FilterContent {
            entries: HashMap::new(),
        }
    }

    fn add_ipv4_line(
        self: &mut FilterEntries,
    pub fn is_empty(&self) -> bool {
        self.entries.is_empty()
    }

    pub fn get(&self, host: &str) -> Option<&FilterEntry> {
        self.entries.get(host)
    }

    fn add_allow_line(
        &mut self,
        source_info: &FileInfo,
        line_num: usize,
        host: &str,
    ) -> Result<()> {
        let host = validate_host(source_info, line_num, host)?;
        self.entries.insert(
            host,
            FilterEntry {
                line_num: Some(line_num),
                dest_upstream: true,
                dest_ipv4: None,
                dest_ipv6: None,
            },
        );
        Ok(())
    }

    fn add_block_line(
        &mut self,
        source_info: &FileInfo,
        line_num: usize,
        host: String,
        dest: Option<Ipv4Addr>,
    ) {
        host: &str,
    ) -> Result<()> {
        let host = validate_host(source_info, line_num, host)?;
        self.entries.insert(
            host,
            FilterEntry {
                line_num: Some(line_num),
                dest_upstream: false,
                dest_ipv4: None,
                dest_ipv6: None,
            },
        );
        Ok(())
    }

    fn add_block_hardcoded(&mut self, host: &str) {
        // Skip validation, assume it's fine...
        self.entries.insert(
            host.to_string(),
            FilterEntry {
                line_num: None,
                dest_upstream: false,
                dest_ipv4: None,
                dest_ipv6: None,
            },
        );
    }

    fn add_override_ipv4_line(
        &mut self,
        source_info: &FileInfo,
        line_num: usize,
        host: &str,
        dest: Ipv4Addr,
    ) -> Result<()> {
        let host = validate_host(source_info, line_num, host)?;
        // If the entry is already present, keep it and just update the ipv4 value.
        // This is mainly for the case of both ipv4+ipv6 entries in the same file.
        let initial_val = FilterEntry {
            line_num: Some(line_num),
            dest_ipv4: dest,
            dest_upstream: false,
            dest_ipv4: Some(dest),
            dest_ipv6: None,
        };
        let map_val = self.entries.entry(host).or_insert(initial_val);


@@ 75,21 138,25 @@ impl FilterEntries {
        // lines in the same file, for example with "localhost" in /etc/hosts. But this is a big corner case
        // and isn't worth the complexity to deal with multiple per-protocol line numbers.
        map_val.line_num = Some(line_num);
        map_val.dest_ipv4 = dest;
        map_val.dest_ipv4 = Some(dest);
        Ok(())
    }

    fn add_ipv6_line(
        self: &mut FilterEntries,
    fn add_override_ipv6_line(
        &mut self,
        source_info: &FileInfo,
        line_num: usize,
        host: String,
        dest: Option<Ipv6Addr>,
    ) {
        host: &str,
        dest: Ipv6Addr,
    ) -> Result<()> {
        let host = validate_host(source_info, line_num, host)?;
        // If the entry is already present, keep it and just update the ipv6 value.
        // This is mainly for the case of both ipv4+ipv6 entries in the same file.
        let initial_val = FilterEntry {
            line_num: Some(line_num),
            dest_upstream: false,
            dest_ipv4: None,
            dest_ipv6: dest,
            dest_ipv6: Some(dest),
        };
        let map_val = self.entries.entry(host).or_insert(initial_val);
        // Update line_num: just go with whatever line is last, since that's what we're using for the dest.


@@ 97,65 164,148 @@ impl FilterEntries {
        // lines in the same file, for example with "localhost" in /etc/hosts. But this is a big corner case
        // and isn't worth the complexity to deal with multiple per-protocol line numbers.
        map_val.line_num = Some(line_num);
        map_val.dest_ipv6 = dest;
        map_val.dest_ipv6 = Some(dest);
        Ok(())
    }
}

    /// Adds a hardcoded block entry for the specified host.
    fn add_hardcoded_block(self: &mut FilterEntries, host: String) {
        self.entries.insert(
            host,
            FilterEntry {
                line_num: None,
                dest_ipv4: None,
                dest_ipv6: None,
            },
        );
/// Creates a list of entries from the provided hardcoded hostnames to block
pub fn block_hardcoded(block_names: &[&str]) -> Result<FilterFile> {
    let mut content = FilterContent::new();
    for name in block_names {
        content.add_block_hardcoded(name);
    }
    Ok(FilterFile{
        info: None,
        content,
    })
}

    pub fn get(self: &FilterEntries, host: &String) -> Option<&FilterEntry> {
        self.entries.get(host)
/// Reads an override file from disk, returning a parsed list of entries
pub fn read_override(info: FileInfo) -> Result<FilterFile> {
    let path_str = info.local_path.clone();
    let path = Path::new(&path_str);
    let file = File::open(path).with_context(|| format!("Failed to open file {:?}", info))?;
    if path::is_zstd_extension(path) {
        read_override_imp(info, zstd::stream::Decoder::new(file)?)
    } else {
        read_override_imp(info, file)
    }
}

    pub fn is_empty(self: &FilterEntries) -> bool {
        self.entries.is_empty()
fn read_override_imp<T: Read>(info: FileInfo, file: T) -> Result<FilterFile> {
    let mut reader = BufReader::new(file);
    let mut buf = String::new();
    let mut line_num = 0;
    let mut content = FilterContent::new();
    loop {
        line_num += 1;
        let len = reader
            .read_line(&mut buf)
            .with_context(|| format!("Failed to read file {:?}", info))?;
        if len == 0 {
            // EOF
            return Ok(FilterFile{
                info: Some(info.clone()),
                content,
            });
        } else {
            match handle_override_line(&buf, line_num, &mut content, &info) {
                Ok(()) => {}
                Err(e) => warn!("Failed to parse {:?} line {}: {}", info, line_num, e),
            };
            buf.clear();
        }
    }
}

/// Creates a list of entries from the provided hardcoded hostnames to block
pub fn block_hardcoded(block_names: &[&str]) -> Result<FilterEntries> {
    let mut entries = FilterEntries::new(FilterType::BLOCK, None);
    for name in block_names {
        entries.add_hardcoded_block(name.to_string());
fn handle_override_line(
    line: &str,
    line_num: usize,
    out: &mut FilterContent,
    info: &FileInfo,
) -> Result<()> {
    let mut words = tokenize(line);

    // Support these formats:
    // - /etc/hosts-style rule: '<ip> <host1> [host2 ... hostN]'
    // - Adblock domain rule:   '||<host>^' (see https://adblockplus.org/filter-cheatsheet)
    // - Hostname block:        '<host>'
    if let Some(first) = words.next() {
        if let Some(second) = words.next() {
            // Second word present: a hostname for hosts-style (and more hostnames may follow)
            if let Some(_) = first.find(':') {
                // Looks like the IP is IPv6
                let ipv6_dest = Ipv6Addr::from_str(first).with_context(|| {
                    format!(
                        "Failed to parse IPv6 address in {:?} line {}: {}",
                        info, line_num, first
                    )
                })?;
                out.add_override_ipv6_line(&info, line_num, second, ipv6_dest)?;
                for word in words {
                    out.add_override_ipv6_line(&info, line_num, word, ipv6_dest)?;
                }
                Ok(())
            } else {
                // Assume the IP is IPv4
                let ipv4_dest = Ipv4Addr::from_str(first).with_context(|| {
                    format!(
                        "Failed to parse IPv4 address in {:?} line {}: {}",
                        info, line_num, first
                    )
                })?;
                out.add_override_ipv4_line(&info, line_num, second, ipv4_dest)?;
                for word in words {
                    out.add_override_ipv4_line(&info, line_num, word, ipv4_dest)?;
                }
                Ok(())
            }
        } else {
            // This is an override file but there's nowhere for the override IP to go
            bail!(
                "Unexpected block-style entry in override rule {:?} line {}: {}",
                info,
                line_num,
                first
            );
        }
    } else {
        // Blank line (possibly after stripping any comments)
        Ok(())
    }
    Ok(entries)
}

/// Reads an override or block file from disk, returning a parsed list of entries
pub fn read(filter_type: FilterType, info: FileInfo) -> Result<FilterEntries> {
/// Reads a block file from disk, returning a parsed list of entries
pub fn read_block(info: FileInfo) -> Result<FilterFile> {
    let path_str = info.local_path.clone();
    let path = Path::new(&path_str);
    let file = File::open(path).with_context(|| format!("Failed to open file {:?}", info))?;
    if path::is_zstd_extension(path) {
        read_imp(filter_type, info, zstd::stream::Decoder::new(file)?)
        read_block_imp(info, zstd::stream::Decoder::new(file)?)
    } else {
        read_imp(filter_type, info, file)
        read_block_imp(info, file)
    }
}

fn read_imp<T: Read>(filter_type: FilterType, info: FileInfo, file: T) -> Result<FilterEntries> {
fn read_block_imp<T: Read>(info: FileInfo, file: T) -> Result<FilterFile> {
    let mut reader = BufReader::new(file);
    let mut buf = String::new();
    let mut line_num = 0;
    let mut entries = FilterEntries::new(filter_type, Some(info.clone()));
    let mut content = FilterContent::new();
    loop {
        line_num += 1;
        let len = reader
            .read_line(&mut buf)
            .with_context(|| format!("Failed to read file {:?}", info))?;
        if len == 0 {
            return Ok(entries); // EOF
            // EOF
            return Ok(FilterFile{
                info: Some(info.clone()),
                content,
            });
        } else {
            match handle_line(&buf, line_num, &mut entries, &info) {
            match handle_block_line(&buf, line_num, &mut content, &info) {
                Ok(()) => {}
                Err(e) => warn!("Failed to parse {:?} line {}: {}", info, line_num, e),
            };


@@ 164,124 314,93 @@ fn read_imp<T: Read>(filter_type: FilterType, info: FileInfo, file: T) -> Result
    }
}

fn handle_line(
fn handle_block_line(
    line: &str,
    line_num: usize,
    out: &mut FilterEntries,
    out: &mut FilterContent,
    info: &FileInfo,
) -> Result<()> {
    // Cut out any comment from the line before tokenizing words
    let mut words = match line.find('#') {
        Some(comment_start) => line[..comment_start].split_ascii_whitespace(),
        None => line.split_ascii_whitespace(),
    };

    // Support both formats:
    // - Hosts via override: <ip> <host1> [host2] [...]
    // - Block or override:  <host>
    match words.next() {
        // First word: either an IP for hosts-style override, or a hostname for block-style
        Some(first) => {
            match words.next() {
                // Second word: a hostname for hosts-style (and more hostnames may follow)
                Some(second) => {
                    match first.find(':') {
                        Some(_) => {
                            let ipv6_dest = Ipv6Addr::from_str(first).with_context(|| {
                                format!(
                                    "Failed to parse IPv6 address in {:?} line {}: {}",
                                    out.info, line_num, first
                                )
                            })?;
                            let out_dest = validate_filter(
                                ipv6_dest,
                                ipv6_dest.is_loopback(),
                                &out,
                                line_num,
                            )?;
                            out.add_ipv6_line(
                                line_num,
                                validate_host(second, &info, line_num)?,
                                out_dest,
                            );
                            // Pass any remaining host mappings on the line as well
                            for word in words {
                                out.add_ipv6_line(
                                    line_num,
                                    validate_host(word, &info, line_num)?,
                                    out_dest,
                                );
                            }
                            Ok(())
                        }
                        None => {
                            let ipv4_dest = Ipv4Addr::from_str(first).with_context(|| {
                                format!(
                                    "Failed to parse IPv4 address in {:?} line {}: {}",
                                    out.info, line_num, first
                                )
                            })?;
                            let out_dest = validate_filter(
                                ipv4_dest,
                                ipv4_dest.is_loopback(),
                                &out,
                                line_num,
                            )?;
                            out.add_ipv4_line(
                                line_num,
                                validate_host(second, &info, line_num)?,
                                out_dest,
                            );
                            // Pass any remaining host mappings on the line as well
                            for word in words {
                                out.add_ipv4_line(
                                    line_num,
                                    validate_host(word, &info, line_num)?,
                                    out_dest,
                                );
                            }
                            Ok(())
                        }
                    }
                }
                // No second word: blocklist style, no destination
                None => {
                    out.add_ipv4_line(line_num, validate_host(first, &info, line_num)?, None);
                    Ok(())
    let mut words = tokenize(line);

    // Support these formats:
    // - /etc/hosts-style rule: '<ip> <host1> [host2 ... hostN]'
    // - Adblock domain rule:   '||<host>^' and similar (see https://adblockplus.org/filter-cheatsheet)
    // - Hostname block:        '<host>'
    if let Some(first) = words.next() {
        if let Some(second) = words.next() {
            // Second word present: a hostname for hosts-style (and more hostnames may follow)
            // Skip parsing the destination IP and just block the host(s).
            trace!("{} block1:  {}", line_num, first);
            out.add_block_line(&info, line_num, second)?;
            for word in words {
                trace!("{} blockN:  {}", line_num, first);
                out.add_block_line(&info, line_num, word)?;
            }
            Ok(())
        } else {
            // No second word, assume it's an adblock or blocklist style for a hostname, no destination
            // TODO: add explicit test cases for the below listed cases
            if first.starts_with("@@") {
                // Looks like an adblock 'allow' rule (frequently malformed):
                // - '@@||<host>^|'
                // - '@@||<host>^'
                // - '@@|<host>^|'
                // - '@@|<host>^'
                // - '@@-<host>^'
                // (Check this first since the following case is 'looser')
                let first_trim = first
                    .trim_start_matches(|c| char::is_ascii_punctuation(&c))
                    .trim_end_matches(|c| char::is_ascii_punctuation(&c));
                trace!("{} allow:   {} -> {}", line_num, first, first_trim);
                out.add_allow_line(&info, line_num, first_trim)?;
            } else if first.starts_with("||") || first.starts_with("://") || first.ends_with("^") {
                // Looks like an adblock 'block' rule (frequently malformed):
                // - '||<host>^'
                // - '||<host>.'
                // - '||<host>'
                // - '://<host>^'
                // - '://<host>'
                // - '://*.<host>' (wildcard automatically trimmed, does what we want)
                // - '.<host>^'
                // - '||<host>^$important'
                // - '||<host>$important'
                let mut first_trim = first;
                if first_trim.ends_with("$important") {
                    // remove any '$important' from end before trimming any punctuation
                    first_trim = &first_trim[0..first.len()-10];
                }
                first_trim = first_trim
                    .trim_start_matches(|c| char::is_ascii_punctuation(&c))
                    .trim_end_matches(|c| char::is_ascii_punctuation(&c));
                trace!("{} adblock: {} -> {}", line_num, first, first_trim);
                out.add_block_line(&info, line_num, &first_trim)?;
            } else {
                // Give up and assume that it's just a standalone hostname: '<host>'
                trace!("{} UNKNOWN: {}", line_num, first);
                out.add_block_line(&info, line_num, first)?;
            }
            Ok(())
        }
    } else {
        // Blank line (possibly after stripping any comments)
        None => Ok(()),
        Ok(())
    }
}

fn validate_filter<T: Display>(
    ip_dest: T,
    is_loopback: bool,
    entries: &FilterEntries,
    line_num: usize,
) -> Result<Option<T>> {
    match entries.filter_type {
        FilterType::BLOCK => {
            // If we're looking at what's supposed to be a block, make sure that any IPs
            // provided are LOCAL/LOOPBACK ONLY. Otherwise we risk the possibility of a
            // "block" maliciously setting other IPs as a sort of DNS hijacking.

            // In other words, block entries MUST either mention NO destination,
            // or the destination should be something like 127.0.0.1.
            // Meanwhile things declared as overrides do not have this restriction.
            if !is_loopback {
                bail!("POSSIBLE DNS HIJACK ATTEMPT BY BLOCK SOURCE: Invalid non-local 'block' IP in {:?} line {}: {}", entries.info, line_num, ip_dest);
fn tokenize(line: &str) -> SplitAsciiWhitespace {
    // Cut out any comment from the line before tokenizing words
    match (line.find('!'), line.find('#')) {
        (Some(comment_start_excl), Some(comment_start_hash)) => {
            // Whichever one comes first is the start of the comment
            if comment_start_excl < comment_start_hash {
                line[..comment_start_excl].split_ascii_whitespace()
            } else {
                line[..comment_start_hash].split_ascii_whitespace()
            }

            // For blocks, just omit the (local) destination IP since it's a stub value anyway.
            Ok(None)
        }
        FilterType::OVERRIDE => {
            // Declared as a override: No loopback requirement, and pass any requested destination upstream.
            Ok(Some(ip_dest))
        }
        },
        (None, Some(comment_start)) => line[..comment_start].split_ascii_whitespace(),
        (Some(comment_start), None) => line[..comment_start].split_ascii_whitespace(),
        (None, None) => line.split_ascii_whitespace(),
    }
}



@@ 291,7 410,7 @@ fn validate_filter<T: Display>(
/// HOWEVER, in reality both of these seem to be valid:
/// - hostnames that start with a number (in the subdomain)
/// - hostnames that contain '_'
fn validate_host(host: &str, source_info: &FileInfo, line_num: usize) -> Result<String> {
fn validate_host(source_info: &FileInfo, line_num: usize, host: &str) -> Result<String> {
    if host.len() < 2 {
        bail!(
            "Invalid host of length {} in {:?} line {}: {}",


@@ 301,6 420,16 @@ fn validate_host(host: &str, source_info: &FileInfo, line_num: usize) -> Result<
            host
        );
    }
    if host.len() > 253 {
        // Don't log the host, in case it's REALLY long
        bail!(
            "Invalid host of length {} in {:?} line {}",
            host.len(),
            source_info,
            line_num
        );
    }

    for (idx, c) in host.char_indices() {
        if idx == host.len() - 1 {
            // Last char


@@ 324,3 453,621 @@ fn validate_host(host: &str, source_info: &FileInfo, line_num: usize) -> Result<
    }
    Ok(host.to_string())
}

#[cfg(test)]
mod tests {
    use super::*;

    /*
    #[test]
    fn actual_file() {
        // Put sample file named 'filter.txt' in repo root:
        let path = "filter.txt";
        let file_info = FileInfo {
            source_path: path.to_string(),
            local_path: path.to_string(),
        };
        let filter_file = read_block(file_info).unwrap();
        assert_eq!(5, filter_file.content.entries.len());
    }
    */

    #[test]
    fn comments_hash() {
        let mut content = FilterContent::new();
        let file_info = FileInfo {
            source_path: "testsrc".to_string(),
            local_path: "testlocal".to_string(),
        };

        assert_eq!(
            true,
            handle_block_line("# ignored comment", 4, &mut content, &file_info).is_ok()
        );
        assert_eq!(
            true,
            handle_block_line("foo.com # ignored comment", 5, &mut content, &file_info).is_ok()
        );
        assert_eq!(
            true,
            handle_block_line("foo.biz# ignored comment", 6, &mut content, &file_info).is_ok()
        );
        assert_eq!(
            true,
            handle_block_line(
                "1.2.3.4 foo.nz foo.co.nz   # ignored comment",
                7,
                &mut content,
                &file_info
            )
            .is_ok()
        );
        assert_eq!(
            true,
            handle_block_line(
                "||foo.geek.nz^   # ignored comment",
                8,
                &mut content,
                &file_info
            )
            .is_ok()
        );

        let entry = content.get("foo.com").unwrap();
        assert_eq!(Some(5), entry.line_num);
        assert_eq!(false, entry.dest_upstream);
        assert_eq!(None, entry.dest_ipv4);
        assert_eq!(None, entry.dest_ipv6);

        let entry = content.get("foo.biz").unwrap();
        assert_eq!(Some(6), entry.line_num);
        assert_eq!(false, entry.dest_upstream);
        assert_eq!(None, entry.dest_ipv4);
        assert_eq!(None, entry.dest_ipv6);

        let entry = content.get("foo.nz").unwrap();
        assert_eq!(Some(7), entry.line_num);
        assert_eq!(false, entry.dest_upstream);
        assert_eq!(None, entry.dest_ipv4);
        assert_eq!(None, entry.dest_ipv6);

        let entry = content.get("foo.co.nz").unwrap();
        assert_eq!(Some(7), entry.line_num);
        assert_eq!(false, entry.dest_upstream);
        assert_eq!(None, entry.dest_ipv4);
        assert_eq!(None, entry.dest_ipv6);

        let entry = content.get("foo.geek.nz").unwrap();
        assert_eq!(Some(8), entry.line_num);
        assert_eq!(false, entry.dest_upstream);
        assert_eq!(None, entry.dest_ipv4);
        assert_eq!(None, entry.dest_ipv6);

        assert_eq!(None, content.get("www.foo.com"));
        assert_eq!(None, content.get("foo2.com"));
    }

    #[test]
    fn comments_excl() {
        let mut content = FilterContent::new();
        let file_info = FileInfo {
            source_path: "testsrc".to_string(),
            local_path: "testlocal".to_string(),
        };

        assert_eq!(
            true,
            handle_block_line("! ignored comment", 4, &mut content, &file_info).is_ok()
        );
        assert_eq!(
            true,
            handle_block_line("foo.com ! ignored comment", 5, &mut content, &file_info).is_ok()
        );
        assert_eq!(
            true,
            handle_block_line("foo.biz! ignored comment", 6, &mut content, &file_info).is_ok()
        );
        assert_eq!(
            true,
            handle_block_line(
                "1.2.3.4 foo.nz foo.co.nz   ! ignored comment",
                7,
                &mut content,
                &file_info
            )
            .is_ok()
        );
        assert_eq!(
            true,
            handle_block_line(
                "||foo.geek.nz^   ! ignored comment",
                8,
                &mut content,
                &file_info
            )
            .is_ok()
        );

        assert_eq!(5, content.entries.len());

        let entry = content.get("foo.com").unwrap();
        assert_eq!(Some(5), entry.line_num);
        assert_eq!(false, entry.dest_upstream);
        assert_eq!(None, entry.dest_ipv4);
        assert_eq!(None, entry.dest_ipv6);

        let entry = content.get("foo.biz").unwrap();
        assert_eq!(Some(6), entry.line_num);
        assert_eq!(false, entry.dest_upstream);
        assert_eq!(None, entry.dest_ipv4);
        assert_eq!(None, entry.dest_ipv6);

        let entry = content.get("foo.nz").unwrap();
        assert_eq!(Some(7), entry.line_num);
        assert_eq!(false, entry.dest_upstream);
        assert_eq!(None, entry.dest_ipv4);
        assert_eq!(None, entry.dest_ipv6);

        let entry = content.get("foo.co.nz").unwrap();
        assert_eq!(Some(7), entry.line_num);
        assert_eq!(false, entry.dest_upstream);
        assert_eq!(None, entry.dest_ipv4);
        assert_eq!(None, entry.dest_ipv6);

        let entry = content.get("foo.geek.nz").unwrap();
        assert_eq!(Some(8), entry.line_num);
        assert_eq!(false, entry.dest_upstream);
        assert_eq!(None, entry.dest_ipv4);
        assert_eq!(None, entry.dest_ipv6);

        assert_eq!(None, content.get("www.foo.com"));
        assert_eq!(None, content.get("foo2.com"));
    }

    #[test]
    fn bad_hosts() {
        let mut content = FilterContent::new();
        let file_info = FileInfo {
            source_path: "testsrc".to_string(),
            local_path: "testlocal".to_string(),
        };

        assert_eq!(false, handle_block_line("a", 4, &mut content, &file_info).is_ok());
        assert_eq!(true, handle_block_line("ab", 4, &mut content, &file_info).is_ok());

        assert_eq!(
            false,
            handle_block_line("ab?", 4, &mut content, &file_info).is_ok()
        );
        assert_eq!(
            false,
            handle_block_line("?ab", 4, &mut content, &file_info).is_ok()
        );

        assert_eq!(
            true,
            handle_block_line("ab0", 4, &mut content, &file_info).is_ok()
        );
        assert_eq!(
            true,
            handle_block_line("0ab", 4, &mut content, &file_info).is_ok()
        );

        assert_eq!(
            false,
            handle_block_line("ab_", 4, &mut content, &file_info).is_ok()
        );
        assert_eq!(
            true,
            handle_block_line("_ab", 4, &mut content, &file_info).is_ok()
        );

        assert_eq!(
            false,
            handle_block_line("ab.", 4, &mut content, &file_info).is_ok()
        );
        assert_eq!(
            true,
            handle_block_line(".ab", 4, &mut content, &file_info).is_ok()
        );

        assert_eq!(
            true,
            handle_block_line("a.b", 4, &mut content, &file_info).is_ok()
        );
        assert_eq!(
            true,
            handle_block_line("a.b", 4, &mut content, &file_info).is_ok()
        );

        assert_eq!(true, handle_block_line("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", 4, &mut content, &file_info).is_ok());
        assert_eq!(false, handle_block_line("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", 4, &mut content, &file_info).is_ok());
    }

    #[test]
    fn block_hostname() {
        let mut content = FilterContent::new();
        let file_info = FileInfo {
            source_path: "testsrc".to_string(),
            local_path: "testlocal".to_string(),
        };

        assert_eq!(
            true,
            handle_block_line("foo.com", 5, &mut content, &file_info).is_ok()
        );

        assert_eq!(1, content.entries.len());

        let entry = content.get("foo.com").unwrap();
        assert_eq!(Some(5), entry.line_num);
        assert_eq!(false, entry.dest_upstream);
        assert_eq!(None, entry.dest_ipv4);
        assert_eq!(None, entry.dest_ipv6);

        assert_eq!(None, content.get("www.foo.com"));
        assert_eq!(None, content.get("foo2.com"));
    }

    #[test]
    fn override_hostname() {
        let mut content = FilterContent::new();
        let file_info = FileInfo {
            source_path: "testsrc".to_string(),
            local_path: "testlocal".to_string(),
        };

        // override filters need IPs
        assert_eq!(
            false,
            handle_override_line("foo.com", 5, &mut content, &file_info).is_ok()
        );
    }

    #[test]
    fn block_adblock() {
        let mut content = FilterContent::new();
        let file_info = FileInfo {
            source_path: "testsrc".to_string(),
            local_path: "testlocal".to_string(),
        };

        assert_eq!(
            true,
            handle_block_line("||foo.com^", 5, &mut content, &file_info).is_ok()
        );

        assert_eq!(
            true,
            handle_block_line("||bar.com.", 6, &mut content, &file_info).is_ok()
        );

        assert_eq!(
            true,
            handle_block_line("||baz.com", 7, &mut content, &file_info).is_ok()
        );

        assert_eq!(
            true,
            handle_block_line("://foo.net^", 8, &mut content, &file_info).is_ok()
        );

        assert_eq!(
            true,
            handle_block_line("://bar.net", 9, &mut content, &file_info).is_ok()
        );

        assert_eq!(
            true,
            handle_block_line("://*.baz.net^", 10, &mut content, &file_info).is_ok()
        );

        assert_eq!(
            true,
            handle_block_line(".foo.org^", 11, &mut content, &file_info).is_ok()
        );

        assert_eq!(
            true,
            handle_block_line("||bar.org^$important", 12, &mut content, &file_info).is_ok()
        );

        assert_eq!(
            true,
            handle_block_line("||baz.org$important", 13, &mut content, &file_info).is_ok()
        );

        assert_eq!(9, content.entries.len());

        let entry = content.get("foo.com").unwrap();
        assert_eq!(Some(5), entry.line_num);
        assert_eq!(false, entry.dest_upstream);
        assert_eq!(None, entry.dest_ipv4);
        assert_eq!(None, entry.dest_ipv6);

        let entry = content.get("bar.com").unwrap();
        assert_eq!(Some(6), entry.line_num);
        assert_eq!(false, entry.dest_upstream);
        assert_eq!(None, entry.dest_ipv4);
        assert_eq!(None, entry.dest_ipv6);

        let entry = content.get("baz.com").unwrap();
        assert_eq!(Some(7), entry.line_num);
        assert_eq!(false, entry.dest_upstream);
        assert_eq!(None, entry.dest_ipv4);
        assert_eq!(None, entry.dest_ipv6);

        let entry = content.get("foo.net").unwrap();
        assert_eq!(Some(8), entry.line_num);
        assert_eq!(false, entry.dest_upstream);
        assert_eq!(None, entry.dest_ipv4);
        assert_eq!(None, entry.dest_ipv6);

        let entry = content.get("bar.net").unwrap();
        assert_eq!(Some(9), entry.line_num);
        assert_eq!(false, entry.dest_upstream);
        assert_eq!(None, entry.dest_ipv4);
        assert_eq!(None, entry.dest_ipv6);

        let entry = content.get("baz.net").unwrap();
        assert_eq!(Some(10), entry.line_num);
        assert_eq!(false, entry.dest_upstream);
        assert_eq!(None, entry.dest_ipv4);
        assert_eq!(None, entry.dest_ipv6);

        let entry = content.get("foo.org").unwrap();
        assert_eq!(Some(11), entry.line_num);
        assert_eq!(false, entry.dest_upstream);
        assert_eq!(None, entry.dest_ipv4);
        assert_eq!(None, entry.dest_ipv6);

        let entry = content.get("bar.org").unwrap();
        assert_eq!(Some(12), entry.line_num);
        assert_eq!(false, entry.dest_upstream);
        assert_eq!(None, entry.dest_ipv4);
        assert_eq!(None, entry.dest_ipv6);

        let entry = content.get("baz.org").unwrap();
        assert_eq!(Some(13), entry.line_num);
        assert_eq!(false, entry.dest_upstream);
        assert_eq!(None, entry.dest_ipv4);
        assert_eq!(None, entry.dest_ipv6);

        assert_eq!(None, content.get("www.foo.com"));
        assert_eq!(None, content.get("foo2.com"));
    }

    #[test]
    fn block_adblock_allow() {
        let mut content = FilterContent::new();
        let file_info = FileInfo {
            source_path: "testsrc".to_string(),
            local_path: "testlocal".to_string(),
        };

        assert_eq!(
            true,
            handle_block_line("@@||foo.com^|", 5, &mut content, &file_info).is_ok()
        );

        assert_eq!(
            true,
            handle_block_line("@@||bar.com^", 6, &mut content, &file_info).is_ok()
        );

        assert_eq!(
            true,
            handle_block_line("@@|baz.com^", 7, &mut content, &file_info).is_ok()
        );

        assert_eq!(
            true,
            handle_block_line("@@|foo.net^", 8, &mut content, &file_info).is_ok()
        );

        assert_eq!(
            true,
            handle_block_line("@@-bar.net^", 9, &mut content, &file_info).is_ok()
        );

        assert_eq!(5, content.entries.len());

        let entry = content.get("foo.com").unwrap();
        assert_eq!(Some(5), entry.line_num);
        assert_eq!(true, entry.dest_upstream);
        assert_eq!(None, entry.dest_ipv4);
        assert_eq!(None, entry.dest_ipv6);

        let entry = content.get("bar.com").unwrap();
        assert_eq!(Some(6), entry.line_num);
        assert_eq!(true, entry.dest_upstream);
        assert_eq!(None, entry.dest_ipv4);
        assert_eq!(None, entry.dest_ipv6);

        let entry = content.get("baz.com").unwrap();
        assert_eq!(Some(7), entry.line_num);
        assert_eq!(true, entry.dest_upstream);
        assert_eq!(None, entry.dest_ipv4);
        assert_eq!(None, entry.dest_ipv6);

        let entry = content.get("foo.net").unwrap();
        assert_eq!(Some(8), entry.line_num);
        assert_eq!(true, entry.dest_upstream);
        assert_eq!(None, entry.dest_ipv4);
        assert_eq!(None, entry.dest_ipv6);

        let entry = content.get("bar.net").unwrap();
        assert_eq!(Some(9), entry.line_num);
        assert_eq!(true, entry.dest_upstream);
        assert_eq!(None, entry.dest_ipv4);
        assert_eq!(None, entry.dest_ipv6);

        assert_eq!(None, content.get("www.foo.com"));
        assert_eq!(None, content.get("foo2.com"));
    }

    #[test]
    fn override_adblock() {
        let mut content = FilterContent::new();
        let file_info = FileInfo {
            source_path: "testsrc".to_string(),
            local_path: "testlocal".to_string(),
        };

        // override filters need IPs
        assert_eq!(
            false,
            handle_override_line("||foo.com^", 5, &mut content, &file_info).is_ok()
        );
    }

    #[test]
    fn block_etchosts() {
        let mut content = FilterContent::new();
        let file_info = FileInfo {
            source_path: "testsrc".to_string(),
            local_path: "testlocal".to_string(),
        };

        assert_eq!(
            true,
            handle_block_line("1.2.3.4 foo.com", 5, &mut content, &file_info).is_ok()
        );
        assert_eq!(
            true,
            handle_block_line("::2 foo.com", 6, &mut content, &file_info).is_ok()
        );
        assert_eq!(
            true,
            handle_block_line("::3 foo.biz", 7, &mut content, &file_info).is_ok()
        );
        assert_eq!(
            true,
            handle_block_line("1.2.3.5 foo.biz", 8, &mut content, &file_info).is_ok()
        );
        assert_eq!(
            true,
            handle_block_line(
                "::4 foo.nz foo.co.nz foo.geek.nz",
                9,
                &mut content,
                &file_info
            )
            .is_ok()
        );

        assert_eq!(5, content.entries.len());

        let entry = content.get("foo.com").unwrap();
        // Just go with the last line num
        assert_eq!(Some(6), entry.line_num);
        assert_eq!(false, entry.dest_upstream);
        assert_eq!(None, entry.dest_ipv4);
        assert_eq!(None, entry.dest_ipv6);

        let entry = content.get("foo.biz").unwrap();
        // Just go with the last line num
        assert_eq!(Some(8), entry.line_num);
        assert_eq!(false, entry.dest_upstream);
        assert_eq!(None, entry.dest_ipv4);
        assert_eq!(None, entry.dest_ipv6);

        let entry = content.get("foo.nz").unwrap();
        assert_eq!(Some(9), entry.line_num);
        assert_eq!(false, entry.dest_upstream);
        assert_eq!(None, entry.dest_ipv4);
        assert_eq!(None, entry.dest_ipv6);

        let entry = content.get("foo.co.nz").unwrap();
        assert_eq!(Some(9), entry.line_num);
        assert_eq!(false, entry.dest_upstream);
        assert_eq!(None, entry.dest_ipv4);
        assert_eq!(None, entry.dest_ipv6);

        let entry = content.get("foo.geek.nz").unwrap();
        assert_eq!(Some(9), entry.line_num);
        assert_eq!(false, entry.dest_upstream);
        assert_eq!(None, entry.dest_ipv4);
        assert_eq!(None, entry.dest_ipv6);

        assert_eq!(None, content.get("www.foo.com"));
        assert_eq!(None, content.get("foo2.com"));
    }

    #[test]
    fn override_etchosts() {
        let mut content = FilterContent::new();
        let file_info = FileInfo {
            source_path: "testsrc".to_string(),
            local_path: "testlocal".to_string(),
        };

        assert_eq!(
            true,
            handle_override_line("1.2.3.4 foo.com", 5, &mut content, &file_info).is_ok()
        );
        assert_eq!(
            true,
            handle_override_line("::2 foo.com", 6, &mut content, &file_info).is_ok()
        );
        assert_eq!(
            true,
            handle_override_line("::3 foo.biz", 7, &mut content, &file_info).is_ok()
        );
        assert_eq!(
            true,
            handle_override_line("1.2.3.5 foo.biz", 8, &mut content, &file_info).is_ok()
        );
        assert_eq!(
            true,
            handle_override_line(
                "::4 foo.nz foo.co.nz foo.geek.nz",
                9,
                &mut content,
                &file_info
            )
            .is_ok()
        );

        assert_eq!(5, content.entries.len());

        let entry = content.get("foo.com").unwrap();
        assert_eq!(Some(6), entry.line_num);
        assert_eq!(false, entry.dest_upstream);
        assert_eq!(
            Some(Ipv4Addr::from_str("1.2.3.4").unwrap()),
            entry.dest_ipv4
        );
        assert_eq!(Some(Ipv6Addr::from_str("::2").unwrap()), entry.dest_ipv6);

        let entry = content.get("foo.biz").unwrap();
        assert_eq!(Some(8), entry.line_num);
        assert_eq!(false, entry.dest_upstream);
        assert_eq!(
            Some(Ipv4Addr::from_str("1.2.3.5").unwrap()),
            entry.dest_ipv4
        );
        assert_eq!(Some(Ipv6Addr::from_str("::3").unwrap()), entry.dest_ipv6);

        let entry = content.get("foo.nz").unwrap();
        assert_eq!(Some(9), entry.line_num);
        assert_eq!(false, entry.dest_upstream);
        assert_eq!(None, entry.dest_ipv4);
        assert_eq!(Some(Ipv6Addr::from_str("::4").unwrap()), entry.dest_ipv6);

        let entry = content.get("foo.co.nz").unwrap();
        assert_eq!(Some(9), entry.line_num);
        assert_eq!(false, entry.dest_upstream);
        assert_eq!(None, entry.dest_ipv4);
        assert_eq!(Some(Ipv6Addr::from_str("::4").unwrap()), entry.dest_ipv6);

        let entry = content.get("foo.geek.nz").unwrap();
        assert_eq!(Some(9), entry.line_num);
        assert_eq!(false, entry.dest_upstream);
        assert_eq!(None, entry.dest_ipv4);
        assert_eq!(Some(Ipv6Addr::from_str("::4").unwrap()), entry.dest_ipv6);

        assert_eq!(None, content.get("www.foo.com"));
        assert_eq!(None, content.get("foo2.com"));
    }
}

M src/lookup.rs => src/lookup.rs +9 -3
@@ 92,16 92,22 @@ impl Lookup {
                filter_result =
                    filter_locked
                        .check(&request_info.name)
                        .map(|(file_info, file_entry)| {
                        .map(|(file_info, filter_entry)| {
                            if let Some(f) = file_info {
                                (f.source_path.clone(), (*file_entry).clone())
                                (f.source_path.clone(), (*filter_entry).clone())
                            } else {
                                (HARDCODED_SOURCE_NAME.clone(), (*file_entry).clone())
                                (HARDCODED_SOURCE_NAME.clone(), (*filter_entry).clone())
                            }
                        });
            }
        }
        if let Some((file_source_path, entry)) = filter_result {
            // If the filter rule says to allow the (sub)domain, then act as if no filter result was found
            // (e.g. allow 'good.foo.com' while blocking 'foo.com')
            if entry.dest_upstream {
                return Ok(false);
            }

            // Filter had a match (block or override), write filtered response to packet_buffer.
            packet_buffer.clear();
            write_filter_response(

M src/runner.rs => src/runner.rs +24 -65
@@ 1,5 1,4 @@
use std::collections::HashMap;
use std::convert::TryFrom;
use std::fs;
use std::net::{SocketAddr, ToSocketAddrs};
use std::path::PathBuf;


@@ 11,7 10,7 @@ use anyhow::{bail, Context, Result};
use async_lock::Barrier;
use async_net::{TcpListener, TcpStream, UdpSocket};
use bytes::BytesMut;
use hyper::{Client, Uri};
use hyper::Client;
use smol::Task;
use tracing::{self, debug, info, trace, warn};



@@ 21,6 20,9 @@ use crate::{cache, client, config, hyper_smol, listen_tcp, listen_udp, lookup};
/// TCP size header is 16 bits, so max theoretical size is 64k
static MAX_TCP_BYTES: u16 = 65535;

/// Amount of time in milliseconds to wait for a download
static DOWNLOAD_TIMEOUT_MS: u64 = 10000;

/// Hardcoded hostnames that we should block for technical reasons
static HARDCODED_BLOCKED_HOSTS: &'static [&'static str] = &[
    // Placeholder domain for users to check that Originz is working


@@ 363,86 365,43 @@ async fn refresh_filters(
    force_update: bool,
) {
    for (_name, conf) in filter_configs {
        let mut filters = Vec::new();
        // For each override/block, check the URL (or do nothing for a local path)
        // Then re-read the result from disk.
        let mut override_filters = Vec::new();
        for entry in &conf.overrides {
            if let Some(filter) = refresh_filter(
                fetch_client,
                filters_dir,
                entry,
                reader::FilterType::OVERRIDE,
                force_update,
            )
            .await
            if let Ok((file_info, downloaded)) =
                filter::update_if_url(fetch_client, filters_dir, entry, DOWNLOAD_TIMEOUT_MS).await
            {
                filters.push(filter);
                if downloaded || force_update {
                    if let Ok(filter) = reader::read_override(file_info) {
                        override_filters.push(filter);
                    }
                }
            }
        }

        let mut block_filters = Vec::new();
        for entry in &conf.blocks {
            if let Some(filter) = refresh_filter(
                fetch_client,
                filters_dir,
                entry,
                reader::FilterType::BLOCK,
                force_update,
            )
            .await
            if let Ok((file_info, downloaded)) =
                filter::update_if_url(fetch_client, filters_dir, entry, DOWNLOAD_TIMEOUT_MS).await
            {
                filters.push(filter);
                if downloaded || force_update {
                    if let Ok(filter) = reader::read_block(file_info) {
                        block_filters.push(filter);
                    }
                }
            }
        }

        if let Ok(mut filter_locked) = filter.lock() {
            filter_locked.update_entries(filters);
            filter_locked.update_override_entries(override_filters);
            filter_locked.update_block_entries(block_filters);
        } else {
            warn!("Failed to lock filter for entry update");
        }
    }
}

async fn refresh_filter(
    fetch_client: &Client<hyper_smol::SmolConnector>,
    fetch_dir: &PathBuf,
    filter_path_or_url: &String,
    filter_type: reader::FilterType,
    force_update: bool,
) -> Option<reader::FilterEntries> {
    if let Ok(filter_uri) = Uri::try_from(filter_path_or_url) {
        // Filesystem paths can get parsed as URLs with no scheme
        if filter_uri.scheme() != None {
            // Looks like a URL, check for updated version to download
            if let Ok((local_path, updated)) =
                filter::update_url(fetch_client, fetch_dir, filter_path_or_url, 10000).await
            {
                if updated || force_update {
                    // The file had an update to download, or an update/read is being forced
                    if let Ok(filter) = reader::read(
                        filter_type,
                        reader::FileInfo {
                            source_path: filter_path_or_url.clone(),
                            local_path,
                        },
                    ) {
                        return Some(filter);
                    }
                }
            }
            return None;
        }
    }

    if let Ok(filter) = reader::read(
        filter_type,
        reader::FileInfo {
            source_path: filter_path_or_url.clone(),
            local_path: filter_path_or_url.clone(),
        },
    ) {
        return Some(filter);
    }
    None
}

async fn handle_next_request(
    server_query_rx: &Mutex<async_channel::Receiver<RequestMsg>>,
    lookup: &mut lookup::Lookup,