From 73c9919c2a47dff1ab55367e7efeded0a99a9be6 Mon Sep 17 00:00:00 2001 From: Piotr Machura Date: Tue, 24 Jan 2023 11:30:47 +0100 Subject: [PATCH] Prototype UDP tracker support --- poetry.lock | 14 +++- pyproject.toml | 1 + torrentmap/client.py | 164 +++++++++++++++++++++++++++++++++++------- torrentmap/torrent.py | 3 +- 4 files changed, 155 insertions(+), 27 deletions(-) diff --git a/poetry.lock b/poetry.lock index d1f5a59..0dd846f 100644 --- a/poetry.lock +++ b/poetry.lock @@ -20,6 +20,18 @@ wrapt = [ {version = ">=1.14,<2", markers = "python_version >= \"3.11\""}, ] +[[package]] +name = "bitstring" +version = "4.0.1" +description = "Simple construction, analysis and modification of binary data." +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "bitstring-4.0.1-py3-none-any.whl", hash = "sha256:4a27cdefd95eb535c4b79e0afcdb5532ba1dba0aaed98a31ad98f46b1e0d5bd9"}, + {file = "bitstring-4.0.1.tar.gz", hash = "sha256:7719f08f6df89ce28453a5e580d4a8ec1d1bda892dbb033466da0ccd9bdcb706"}, +] + [[package]] name = "black" version = "22.12.0" @@ -585,4 +597,4 @@ files = [ [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "363d6530fa9aaadaa900b05a67cdd820b4d33b24d45d59f78e231827ef57d350" +content-hash = "fbb7bc3aeda70ec899f6aa857d892e20d0452a2850f9e1523677fd34abc4cc35" diff --git a/pyproject.toml b/pyproject.toml index 8b9cc55..3c34857 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,6 +10,7 @@ readme = "README.md" python = "^3.10" requests = "^2.28.2" libtorrent = "^2.0.7" +bitstring = "^4.0.1" [tool.poetry.group.dev.dependencies] black = "^22.12.0" diff --git a/torrentmap/client.py b/torrentmap/client.py index e7ed0f4..f52c4ae 100644 --- a/torrentmap/client.py +++ b/torrentmap/client.py @@ -2,39 +2,35 @@ Implements 'raw' UDP/HTTP bittorrent announce client to communicate with trackers and obtain a list of peers. + +For details on UDP tracker protocol, see +https://www.bittorrent.org/beps/bep_0015.html + +For details on HTTP(S) tracker protocol, see +https://wiki.theory.org/BitTorrentSpecification#Tracker_HTTP.2FHTTPS_Protocol """ import binascii import logging as log -import random +import socket +from ipaddress import ip_address from os import urandom +from urllib.parse import urlparse import libtorrent as lt import requests - -# Protocol says to keep it that way (https://www.bittorrent.org/beps/bep_0015.html) -PROTOCOL_ID = 0x41727101980 - -# Scrape response offset, first 8 bytes (4 bytes action, 4 bytes connection_id) -OFFSET = 8 +from bitstring import BitArray, pack # UDP Packet Buffer Size -PACKET_BUFFER_SIZE = 512 - -# Peer ID is random 20 bytes -PEER_ID = urandom(20) +BUFFER_SIZE = 512 -# Port announced to trackers - will not actuall be used -PORT = random.randint(6881, 6889) - - -def border_left(i: int) -> int: - """Scrapre response start infohash data.""" - return OFFSET + (i * 12) - 12 +# Port announced to trackers - will not actually be used +PORT = 6888 +# Peer ID - random 20 bytes +PEER_ID = urandom(20) -def border_right(i: int) -> int: - """Scrapre response end infohash data.""" - return OFFSET + (i * 12) +# Key - random 4 byte integer +KEY = int.from_bytes(urandom(4), "big", signed=True) def get_ips(tracker_url: str, info_hash: str) -> set: @@ -44,7 +40,8 @@ def get_ips(tracker_url: str, info_hash: str) -> set: (strings) of peers provided by the tracker. """ peers = set() - if tracker_url.startswith("http"): + url = urlparse(tracker_url) + if url.scheme in ["http", "https"]: # HTTP params = { "info_hash": binascii.unhexlify(info_hash), @@ -55,7 +52,7 @@ def get_ips(tracker_url: str, info_hash: str) -> set: "left": 1, "numwant": 50, } - response = requests.get(tracker_url, params=params, timeout=10) + response = requests.get(tracker_url, params=params, timeout=30) decoded = lt.bdecode(bytes(response.text, response.encoding)) for peer in decoded[b"peers"]: peers.add(peer[b"ip"].decode("utf-8")) @@ -65,8 +62,125 @@ def get_ips(tracker_url: str, info_hash: str) -> set: tracker_url, info_hash, ) - elif tracker_url.startswith("udp"): - log.warning("%s - UDP trackers not impelmented yet, skipping", tracker_url) + elif url.scheme == "udp": + # Create UDP socket and connect + sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) + sock.connect((url.hostname, url.port)) + + # transaction ID - random 32bit (4 byte) integer + transaction_id = int.from_bytes(urandom(4), "big", signed=True) + + # Establish a session with the server + payload = pack( + ", ".join( + [ + "int64", # Magic number + "int32", # action + "int32", # transaction id + ] + ), + 0x41727101980, + 0, # connection request + transaction_id, + ) + sock.send(payload.tobytes()) + + response = BitArray(sock.recv(16)) + ( # pylint: disable=unbalanced-tuple-unpacking + action, + response_transaction_id, + con_id, + ) = response.unpack( + ", ".join( + ( + "int32", # action (should be 0) + "int32", # transaction id (should mach the one we sent) + "int64", # connection id + ) + ) + ) + + if action != 0: + log.warning( + "%s did not respond with 0 (establish action), skipping", tracker_url + ) + if response_transaction_id != transaction_id: + log.warning( + "recieved incorrect transaction ID from %s, skipping", tracker_url + ) + return peers + log.info("completed UDP handshake with %s, asking for peers", tracker_url) + + # Use connection id provided by the server to announce and request a list of + # peers + payload = pack( + ", ".join( + ( + "int64", # connection id + "int32", # action + "int32", # transaction id + "bits160", # infohash + "bits160", # peer id + "int64", # dowloaded + "int64", # left + "int64", # uploaded + "int32", # event + "int32", # IP address + "int32", # key + "int32", # num_want + "int16", # port + ) + ), + con_id, + 1, # announce + transaction_id, + binascii.unhexlify(info_hash), + PEER_ID, + 1, + 1, + 0, + 0, # event: none + 0, # IP address - not necessary + KEY, + 50, # numwant + PORT, + ) + sock.send(payload.tobytes()) + + response = BitArray(sock.recv(BUFFER_SIZE)) + ( # pylint: disable=unbalanced-tuple-unpacking + action, + response_transaction_id, + _interval, + _leechers, + _seeders, + ip, + _port, + ) = response.unpack( + ", ".join( + [ + "int32", # action (should be 1) + "int32", # transaction id (should match the one we sent) + "int32", # interval + "int32", # leechers + "int32", # seeders + "uint32", # IP address + "uint16", # port + ] + ) + ) + if action != 1: + log.warning( + "%s did not respond with 1 (announce action), skipping", tracker_url + ) + if response_transaction_id != transaction_id: + log.warning( + "recieved incorrect transaction ID from %s, skipping", tracker_url + ) + return peers + log.info("got %i (%s) as IP from %s", ip, ip_address(ip), tracker_url) + + else: log.error("%s - unknown tracker type, skipping", tracker_url) return peers diff --git a/torrentmap/torrent.py b/torrentmap/torrent.py index ce4cd19..823a058 100644 --- a/torrentmap/torrent.py +++ b/torrentmap/torrent.py @@ -52,6 +52,7 @@ class Torrent: Uses `https://db-ip.com` API for geolocation. """ + # IPs of not-yet-geolocated peers addresses = list( map( lambda peer: peer.ip, @@ -66,7 +67,7 @@ class Torrent: while len(chunk) != 0: query = "https://api.db-ip.com/v2/free/" query += ",".join(chunk) - response = requests.get(query, timeout=600).json() + response = requests.get(query, timeout=30).json() for ip, peer in self.peers.items(): geo = response.get(ip) if geo is not None: -- 2.45.2