~nickbp/kapiti

kapiti/src/config.rs -rw-r--r-- 9.1 KiB
909a89f0Nick Parker Fix build badge 22 days ago
                                                                                
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
use std::collections::HashMap;
use std::env;
use std::fmt;
use std::fs::File;
use std::io::prelude::*;
use std::marker::PhantomData;
use std::vec::Vec;

use anyhow::{Context, Result};
use serde::de;
use serde::{Deserialize, Deserializer};
use tracing::{trace, warn};

/// The struct representation of an Kapiti filter section
#[derive(Clone, Debug, Deserialize)]
#[serde(deny_unknown_fields)]
pub struct ConfigFilter {
    /// Zero or more file paths or URLs for override files, where URLs will be automatically updated periodically.
    /// Each listed file should look similar to a `/etc/hosts` file: a newline-separated list of hostnames paired with IP destinations.
    #[serde(default)]
    #[serde(alias = "override")]
    #[serde(deserialize_with = "string_or_seq_string")]
    pub overrides: Vec<String>,

    /// Zero or more file paths or URLs for block files, where URLs will be automatically updated periodically.
    /// Each listed file should contain a newline-separated list of hostnames to be blocked (along with their subdomains).
    /// This cannot be combined with `allows` in the same filter object.
    #[serde(default)]
    #[serde(alias = "block")]
    #[serde(deserialize_with = "string_or_seq_string")]
    pub blocks: Vec<String>,

    /// Zero or more file paths or URLs for allow files, where URLs will be automatically updated periodically.
    /// Each listed file should contain a newline-separated list of hostnames to be allowed (along with their subdomains).
    /// This cannot be combined with `blocks` in the same filter object.
    #[serde(default)]
    #[serde(alias = "allow")]
    #[serde(deserialize_with = "string_or_seq_string")]
    pub allows: Vec<String>,

    /// The hosts that this filter block should apply to, or empty/unset for all requests (unless another filter has a closer `applies_to`).
    /// May either be a single IP (`192.16.1.2`) or a range of IPs via CIDR notation (`192.16.1.0/24`).
    /// No two filter blocks may have the same `applies_to` setting.
    /// The filter block with the "closest" match for a given host is the one that will be evaluated.
    #[serde(default)]
    #[serde(alias = "apply_to")]
    pub applies_to: String,
}

/// The struct representation of an Kapiti TOML config file.
#[derive(Debug, Deserialize)]
#[serde(deny_unknown_fields)]
pub struct Config {
    /// Where temporary files, such as downloaded override/block files, should be stored.
    /// Defaults to `/tmp/kapiti`.
    #[serde(default = "default_storage")]
    pub storage: String,

    /// If the service is started as the root user, what user it should "downgrade" to.
    /// Defaults to `nobody`, or may be set to an empty string to disable.
    #[serde(default = "default_user")]
    pub user: String,

    /// Number of worker threads for processing queries. Higher values allow more queries
    /// to be handled simultaneously at the cost of more resource consumption.
    #[serde(default = "default_workers")]
    pub workers: u64,

    /// IP+Port endpoint for the DNS server.
    /// Defaults to `0.0.0.0:53`.
    #[serde(default = "default_listen_dns")]
    pub listen_dns: String,

    /// How long to wait between checks for changes to local and remote filters.
    #[serde(default = "default_filter_refresh_seconds")]
    pub filter_refresh_seconds: u64,

    /// Filters to apply against request.
    /// Each filter block may optionally specify applicable client IPs for that block.
    #[serde(default)]
    #[serde(alias = "filter")]
    pub filters: HashMap<String, ConfigFilter>,

    /// Equivalent to `ConfigFilter.overrides` with an empty `ConfigFilter.applies_to`.
    #[serde(default)]
    #[serde(alias = "override")]
    #[serde(deserialize_with = "string_or_seq_string")]
    pub overrides: Vec<String>,

    /// Equivalent to `ConfigFilter.blocks` with an empty `ConfigFilter.applies_to`.
    #[serde(default)]
    #[serde(alias = "block")]
    #[serde(deserialize_with = "string_or_seq_string")]
    pub blocks: Vec<String>,

    /// Equivalent to `ConfigFilter.allows` with an empty `ConfigFilter.applies_to`.
    #[serde(default)]
    #[serde(alias = "allow")]
    #[serde(deserialize_with = "string_or_seq_string")]
    pub allows: Vec<String>,

    /// One or more DNS server endpoints.
    /// These may be provided as hostnames (required for DNS-over-HTTPS), but at least one upstream entry must be provided as an IP to "bootstrap" any hostname entries.
    /// Entries are in order of priority, where earlier entries are used before trying later entries. The following protocols are supported:
    /// - `127.0.0.1[:53]` for a "classic" port 53 UDP+TCP endpoint, where UDP will take priority
    /// - `udp://127.0.0.1[:53]` for a "classic" port 53 endpoint that's UDP-only
    /// - `tcp://127.0.0.1[:53]` for a "classic" port 53 endpoint that's TCP-only
    /// - `https://example.com[:443]/dns-query` or `https://127.0.0.1[:443]/dns-query` for a DNS-over-HTTP endpoint, see https://github.com/curl/curl/wiki/DNS-over-HTTPS#publicly-available-servers
    /// - `tls://127.0.0.1[:853]` for a DNS-over-TLS endpoint, see https://dnsprivacy.org/public_resolvers/#dns-over-tls-dot
    #[serde(default)]
    #[serde(alias = "upstream")]
    #[serde(deserialize_with = "string_or_seq_string")]
    pub upstreams: Vec<String>,

    /// URL endpoint for a Redis cache.
    /// For example `redis://127.0.0.1:6379/0`, or `redis://user:password@redis.local:6379/0`.
    /// Or empty/unset to disable Redis in favor of an internal in-memory cache.
    #[serde(default)]
    pub redis: String,

    /// Max records for internal in-memory cache, or 0 for no limit.
    /// The cache is automatically pruned of records as they expire, so this limit shouldn't be reached except for heavier workloads.
    /// This is ignored if `redis` is provided.
    #[serde(default = "default_cache_size")]
    pub cache_size: usize,
}

impl Config {
    /// Returns a new `Config` instance suitable for use in benchmark tests.
    /// Most values are empty or left as their defaults, while the "listen" value is set to `127.0.0.1:0` for an ephemeral port.
    pub fn new_for_test(storage: &str, upstream: String) -> Config {
        let listen_random = "127.0.0.1:0".to_string();
        Config {
            storage: storage.to_string(),
            // Disable user downgrade to avoid system-specific issues (what if 'nobody' doesn't exist in the test environment?)
            user: "".to_string(),
            workers: default_workers(),
            listen_dns: listen_random,
            upstreams: vec![upstream],
            filter_refresh_seconds: default_filter_refresh_seconds(),
            filters: HashMap::new(),
            overrides: Vec::new(),
            blocks: Vec::new(),
            allows: Vec::new(),
            redis: "".to_string(),
            cache_size: default_cache_size(),
        }
    }
}

fn default_storage() -> String {
    "/tmp/kapiti".to_string()
}

fn default_user() -> String {
    "nobody".to_string()
}

fn default_workers() -> u64 {
    10
}

fn default_listen_dns() -> String {
    "0.0.0.0:53".to_string()
}

fn default_filter_refresh_seconds() -> u64 {
    3 * 60 * 60 // 3 hours
}

fn default_cache_size() -> usize {
    100000 // 100k feels like it should be plenty
}

/// A config value that can be provided either as a string or a list of strings in the TOML file.
/// We convert both cases to a `Vec<String>` automatically.
fn string_or_seq_string<'de, D>(deserializer: D) -> Result<Vec<String>, D::Error>
where
    D: Deserializer<'de>,
{
    struct StringOrVec(PhantomData<Vec<String>>);

    impl<'de> de::Visitor<'de> for StringOrVec {
        type Value = Vec<String>;

        fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
            formatter.write_str("string or list of strings")
        }

        fn visit_str<E>(self, value: &str) -> Result<Self::Value, E>
        where
            E: de::Error,
        {
            Ok(vec![value.to_owned()])
        }

        fn visit_seq<S>(self, visitor: S) -> Result<Self::Value, S::Error>
        where
            S: de::SeqAccess<'de>,
        {
            Deserialize::deserialize(de::value::SeqAccessDeserializer::new(visitor))
        }
    }

    deserializer.deserialize_any(StringOrVec(PhantomData))
}

pub fn parse_config_file(config_path: &String) -> Result<Config> {
    let mut config = String::new();
    File::open(&config_path)?.read_to_string(&mut config)?;

    // For each "CONFIG_FOO" envvar, replace any instances of "{{FOO}}" with the envvar value
    for (oskey, osvalue) in env::vars_os() {
        if let Some(key) = oskey.to_str() {
            if !key.starts_with("CONFIG_") {
                continue;
            }
            if let Some(value) = osvalue.to_str() {
                let replaceme = format!("{{{{{}}}}}", key.to_string()[7..].to_string());
                config = config.replace(replaceme.as_str(), value);
            } else {
                warn!(
                    "Envvar {} value is not valid UTF8, templating disabled",
                    key
                );
            }
        }
    }

    trace!("Rendered config:\n{}", config);

    toml::from_str(config.as_str()).with_context(|| "Failed to parse TOML config")
}