~rumpelsepp/openbl

openbl/fetch.go -rw-r--r-- 4.7 KiB
69f81896 — Stefan Tatschner Revert "https seems to be broken in openbl.org" 3 years ago
                                                                                
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
package main

import (
	"errors"
	"io/ioutil"
	"log"
	"net"
	"net/http"
	"regexp"
	"strings"
)

var defaultBlacklists = []string{
	"https://www.openbl.org/lists/base_all.txt",
	"https://www.spamhaus.org/drop/edrop.txt",
}

// FIXME: Finish non capturing groups
// FIXME: Test ipv6 regexes
// Regular expressions stolen from: http://ruilapa.net/2016/08/04/golang-ipv4-ipv6-regexp/
// Regexes slightly modified to use non capturing groups.
var (
	ipv4RE     = regexp.MustCompile(`(?:(?:25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9][0-9]|[0-9])\.){3}(?:25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9][0-9]|[0-9])`)
	ipv4CIDRRE = regexp.MustCompile(`(?:(?:25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9][0-9]|[0-9])\.){3}(?:25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9][0-9]|[0-9])\/(?:3[0-2]|[1-2][0-9]|[0-9])`)
	ipv6RE     = regexp.MustCompile(`s*((([0-9A-Fa-f]{1,4}:){7}([0-9A-Fa-f]{1,4}|:))|(([0-9A-Fa-f]{1,4}:){6}(:[0-9A-Fa-f]{1,4}|((25[0-5]|2[0-4]d|1dd|[1-9]?d)(.(25[0-5]|2[0-4]d|1dd|[1-9]?d)){3})|:))|(([0-9A-Fa-f]{1,4}:){5}(((:[0-9A-Fa-f]{1,4}){1,2})|:((25[0-5]|2[0-4]d|1dd|[1-9]?d)(.(25[0-5]|2[0-4]d|1dd|[1-9]?d)){3})|:))|(([0-9A-Fa-f]{1,4}:){4}(((:[0-9A-Fa-f]{1,4}){1,3})|((:[0-9A-Fa-f]{1,4})?:((25[0-5]|2[0-4]d|1dd|[1-9]?d)(.(25[0-5]|2[0-4]d|1dd|[1-9]?d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){3}(((:[0-9A-Fa-f]{1,4}){1,4})|((:[0-9A-Fa-f]{1,4}){0,2}:((25[0-5]|2[0-4]d|1dd|[1-9]?d)(.(25[0-5]|2[0-4]d|1dd|[1-9]?d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){2}(((:[0-9A-Fa-f]{1,4}){1,5})|((:[0-9A-Fa-f]{1,4}){0,3}:((25[0-5]|2[0-4]d|1dd|[1-9]?d)(.(25[0-5]|2[0-4]d|1dd|[1-9]?d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){1}(((:[0-9A-Fa-f]{1,4}){1,6})|((:[0-9A-Fa-f]{1,4}){0,4}:((25[0-5]|2[0-4]d|1dd|[1-9]?d)(.(25[0-5]|2[0-4]d|1dd|[1-9]?d)){3}))|:))|(:(((:[0-9A-Fa-f]{1,4}){1,7})|((:[0-9A-Fa-f]{1,4}){0,5}:((25[0-5]|2[0-4]d|1dd|[1-9]?d)(.(25[0-5]|2[0-4]d|1dd|[1-9]?d)){3}))|:)))(%.+)?s*`)
	ipv6CIDRRE = regexp.MustCompile(`s*((([0-9A-Fa-f]{1,4}:){7}([0-9A-Fa-f]{1,4}|:))|(([0-9A-Fa-f]{1,4}:){6}(:[0-9A-Fa-f]{1,4}|((25[0-5]|2[0-4]d|1dd|[1-9]?d)(.(25[0-5]|2[0-4]d|1dd|[1-9]?d)){3})|:))|(([0-9A-Fa-f]{1,4}:){5}(((:[0-9A-Fa-f]{1,4}){1,2})|:((25[0-5]|2[0-4]d|1dd|[1-9]?d)(.(25[0-5]|2[0-4]d|1dd|[1-9]?d)){3})|:))|(([0-9A-Fa-f]{1,4}:){4}(((:[0-9A-Fa-f]{1,4}){1,3})|((:[0-9A-Fa-f]{1,4})?:((25[0-5]|2[0-4]d|1dd|[1-9]?d)(.(25[0-5]|2[0-4]d|1dd|[1-9]?d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){3}(((:[0-9A-Fa-f]{1,4}){1,4})|((:[0-9A-Fa-f]{1,4}){0,2}:((25[0-5]|2[0-4]d|1dd|[1-9]?d)(.(25[0-5]|2[0-4]d|1dd|[1-9]?d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){2}(((:[0-9A-Fa-f]{1,4}){1,5})|((:[0-9A-Fa-f]{1,4}){0,3}:((25[0-5]|2[0-4]d|1dd|[1-9]?d)(.(25[0-5]|2[0-4]d|1dd|[1-9]?d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){1}(((:[0-9A-Fa-f]{1,4}){1,6})|((:[0-9A-Fa-f]{1,4}){0,4}:((25[0-5]|2[0-4]d|1dd|[1-9]?d)(.(25[0-5]|2[0-4]d|1dd|[1-9]?d)){3}))|:))|(:(((:[0-9A-Fa-f]{1,4}){1,7})|((:[0-9A-Fa-f]{1,4}){0,5}:((25[0-5]|2[0-4]d|1dd|[1-9]?d)(.(25[0-5]|2[0-4]d|1dd|[1-9]?d)){3}))|:)))(%.+)?s*\/(12[0-8]|1[0-1][0-9]|[1-9][0-9]|[0-9])`)
)

// TODO: Error handling?
func extractIPsAndNetworks(rawStr string) (string, error) {
	// We do some sort of duplicated work here, but it is sane to filter the ip
	// address strings out of the untrusted output first, and then afterwards
	// do some parsing, to validate that the ip is actually correct and makes
	// sense as well. Also, we may be able to do some further checks in the future...
	addrStr := ipv4CIDRRE.FindString(rawStr)

	// Let's try all the different, ugly regexes and hope that they actually match...
	if addrStr == "" {
		addrStr = ipv4RE.FindString(rawStr)
	}

	if addrStr == "" {
		addrStr = ipv6CIDRRE.FindString(rawStr)
	}

	if addrStr == "" {
		addrStr = ipv6RE.FindString(rawStr)
	}

	if addrStr == "" {
		return "", errors.New("Not a valid ip address.")
	}

	// Not really necessary, as we have done all that regex foo before,
	// but it introduces one more layer of sanity check and enables
	// further checks using the methods from the go stdlib.
	if addr := net.ParseIP(addrStr); addr != nil {
		return addr.String(), nil
	}

	if _, network, err := net.ParseCIDR(addrStr); err == nil {
		// We don't need the addr, so let's only return the network.
		return network.String(), nil
	}

	return "", errors.New("Not a valid ip address.")
}

func fetchBlacklist(link string, ipAddresses chan string, done chan bool) {
	resp, err := http.Get(link)
	if err != nil {
		log.Println("HTTP GET failed; Terminating worker.")
		done <- true
		return
	}
	defer resp.Body.Close()

	body, err := ioutil.ReadAll(resp.Body)
	if err != nil {
		log.Println("Reading body failed; Terminating worker.")
		done <- true
		return
	}

	for _, line := range strings.Split(string(body), "\n") {
		// TODO: Parallize this as well. But not high prio... :)
		validatedIP, err := extractIPsAndNetworks(line)
		if err != nil {
			if debug {
				log.Println(err)
			}
			continue
		}

		ipAddresses <- validatedIP
	}

	done <- true
}