~moviuro/moviuro.bin

moviuro.bin/lie-to-me -rwxr-xr-x 7.3 KiB
3b0cfebb — Moviuro blackhole: add -a for allowlist 20 days ago
                                                                                
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
#!/bin/sh

set -e

_target="127.0.0.1"
_format="unbound"
_unresolve="use-application-dns.net"

__usage () {
  cat << EOF
$0 [-d] [-f format] [-o out] [-t target] [-u "domain [domain [...]]"]
$0 -h

Create a lying DNS file for the specified format.
Lies include: tracking, malware-related, advertizing sites and also sites that
must not resolve, see below.
Lies never include: TLDs or public suffixes, (see https://publicsuffix.org/)
  o -d: enable debug (set -x)
  o -f format: specify an output format.
    Supported formats are:
      - unbound
      - bind
      - hosts (see hosts(5))
      - none  (one hostname per line)
    Default: $_format
  o -o out: specify an output file.
    Default: <stdout>
  o -t target: specify the target
    Default: $_target
  o -u "domain [domain [...]]": domains to break resolution for (NXDOMAIN)
    Default: $_unresolve

MAKE SURE YOU ARE ALLOWED TO USE THE LISTS IN YOUR SITUATION

There are also a few domains that we MUST take care of:
  o localhost: this one is the most bizarre. Some badly written applications
    WILL query our lying DNS resolver for 'localhost' and ignore
    nsswitch.conf(5) (like, e.g. redis)
  o use-application-dns.net: this one is a canary domain for DoH. It is used
    by badly written software (Firefox) to check whether it should use DoH. This
    defeats the ENTIRE FUCKING PURPOSE of DoH ANYWAY because my ISP could very
    well intercept and modify that DNS request (made by the usual means) to
    return NXDOMAIN, thus degrading Firefox to classic DNS queries. WHICH IS THE
    EXACT SITUATION DoH WAS SUPPOSED TO PROTECT USERS FROM.
  o We explicitly reject TLDs. If one of the input files contains a TLD, we will
    ignore it: we intend to block bad domains, not the web in its entirety.

SEE ALSO

  https://support.mozilla.org/en-US/kb/canary-domain-use-application-dnsnet
  https://news.ycombinator.com/item?id=22414912
  https://publicsuffix.org/
EOF
}

while getopts ":df:ho:t:u:" _opt; do
  case "$_opt" in
    d) set -x                 ;;
    f) _format="$OPTARG"      ;;
    h) __usage; exit 0        ;;
    o) _output_file="$OPTARG" ;;
    t) _target="$OPTARG"      ;;
    u) _unresolve="$OPTARG"   ;;
    *) __usage >&2 ; exit 1   ;;
  esac
done

shift "$((OPTIND-1))"

true() {
  :
}

# first choice is curl(1), should be OK for Linux
if command -v curl >/dev/null 2>&1; then
  __download () {
    curl --compressed -sL "$1"
  }
# second is fetch(1), for FreeBSD
elif command -v fetch >/dev/null 2>&1; then
  __download () {
    fetch -q -o - "$1"
  }
# last comes ftp(1), which does far more than its name implies (OpenBSD only)
elif [ "$(uname -s)" = OpenBSD ] && command -v ftp >/dev/null 2>&1; then
  __download () {
    ftp -o - "$1"
  }
else
  printf '%s\n' "Oh no! You don't seem to have curl(1) or fetch(1)" >&2
  printf '%s\n' "I'm cowardly exiting" >&2
  exit 4
fi

# If we're here, we're actually going to do something
_temp_dir="$(mktemp -d -t "$(basename "$0").XXXXXX")"
_temp="$_temp_dir/hosts"
_temp_ack="$_temp_dir/acks"
_unresolve_regex="^($(echo "$_unresolve" | tr ' ' '|'))$"
_public_suffix="$_temp_dir/tlds"

__cleanup () {
  [ -d "$_temp_dir" ] && rm -fr "$_temp_dir"
}

trap __cleanup INT TERM

__add_hosts () {
  printf '# Original file at %s\n' "$1" >> "$_temp_ack"
  __download "$1" | awk "/^$2 / "'{ print $2 }' >> "$_temp"
}

__add_simple () {
  printf '# Original file at %s\n' "$1" >> "$_temp_ack"
  # We only want the lines that are not comments, and we don't fail on empty files
  __download "$1" | grep -e '^[^\#]' >> "$_temp" || true
}

__create_public_suffix () {
  printf '# The public suffix list was used https://publicsuffix.org\n' >> "$_temp_ack"
  __download "https://publicsuffix.org/list/public_suffix_list.dat" |
   # This file's has a specific syntax: https://publicsuffix.org/list/
   grep -vE '^(//|$|!)' | grep -Eo '[^(^\*\.)].+' | sort > "$_public_suffix"
}

__to_unix () {
  # We remove weird characters
  tr -d '\r' < "$_temp" |
  # We cast everything to lower-case
  tr '[:upper:]' '[:lower:]' |
  # We ignore lines with spaces or empty lines
  grep -vE '( |^$)' |
  # We ignore "localhost"; this is the single most problematic host ever.
  # Many poorly configured programs will use the DNS to ask who is "localhost",
  # instead of asking the true source in nsswitch.conf(5). This causes
  # phenomenal clusterfucks.
  grep -v '^localhost$' |
  # Ignore domains in $_unresolve too, they will be added later
  grep -vE "$_unresolve_regex" |
  sort -u > "$_temp".2
  # Ignore lines that are a TLD or a public suffix
  # comm(1) is insanely faster than any grep(1) or rg(1) here
  comm -23 "$_temp".2 "$_public_suffix" > "$_temp"
}

__to_output () {
  if [ -z "$_output_file" ]; then
    cat -
  else
    if [ -e "$_output_file" ]; then
      # just in case something goes wrong
      # I wrote that script though. It shouldn't go wrong.
      # Right...?
      cp "$_output_file" "$_output_file".bak
    fi
    cat - > "$_output_file"
  fi
}

__headers () {
  :
}

__unresolve () {
  :
}

# We define the __to_format function that properly prints the host redirection
# in the specified form.
# Usage:   __to_format "host"        "target"
# Example: __to_format "example.com" "10.10.10.10"
case "$_format" in
  "unbound")
    __unresolve () {
      for _d in "$@"; do
        printf 'local-zone: "%s" static\n' "$_d"
      done
    }
    __to_format () {
      printf 'local-zone: "%s" redirect\n' "$1"
      printf 'local-data: "%s A %s"\n' "$1" "$2"
    }
  ;;
  "none")
    __to_format () {
      printf '%s\n' "$1"
    }
  ;;
  "bind")
    echo "I do NOT include the headers in $_output_file" >&2
    __to_format () {
      printf '%s. IN A %s\n' "$1" "$2"
    }
    __unresolve () {
      echo "I need help with this item on bind; please reach out." >&2
    }
  ;;
  "hosts")
    __to_format () {
      printf '%s  %s\n' "$2" "$1"
    }
    __headers () {
      printf '%s\n' "# /etc/hosts: static lookup table for host names"
      printf '\n'
      printf '%s  %s  %s\n' "127.0.0.1" "localhost.localdomain" "localhost"
      printf '%s  %s  %s\n' "::1" "localhost.localdomain" "localhost"
      printf '\n'
    }
  ;;
  *)
    __usage >&2 ; exit 1
  ;;
esac

__create_public_suffix

__add_simple "https://s3.amazonaws.com/lists.disconnect.me/simple_ad.txt"
__add_simple "https://s3.amazonaws.com/lists.disconnect.me/simple_tracking.txt"
__add_simple "https://s3.amazonaws.com/lists.disconnect.me/simple_malware.txt"
__add_simple "https://s3.amazonaws.com/lists.disconnect.me/simple_malvertising.txt"

__add_simple "https://pgl.yoyo.org/adservers/serverlist.php?mimetype=plaintext&hostformat=plain"
__add_simple "https://raw.githubusercontent.com/ookangzheng/dbl-oisd-nl/master/dbl2.txt"

__add_hosts "https://someonewhocares.org/hosts/zero/hosts" '0\.0\.0\.0'
__add_hosts "http://winhelp2002.mvps.org/hosts.txt" '0\.0\.0\.0'
__add_hosts "https://raw.githubusercontent.com/StevenBlack/hosts/master/hosts" \
  '0\.0\.0\.0'
__add_hosts "https://www.malwaredomainlist.com/hostslist/hosts.txt" \
  '127\.0\.0\.1'
__add_hosts "https://www.github.developerdan.com/hosts/lists/ads-and-tracking-extended.txt" \
  '0\.0\.0\.0'

__to_unix

{
  # We print the acks first
  cat "$_temp_ack"
  # We print the headers too, if we know how to do it
  __headers
  while IFS= read -r _host; do
    __to_format "$_host" "$_target"
  done
  # Last, we add hosts that shouldn't resolve at all
  __unresolve $_unresolve
} < "$_temp" | __to_output

__cleanup