pythonp2pbittorrentdhttorrent

How to perform a BitTorrent handshake given an infohash and it's peers?


I'd like to continue my last related thread in my attempt to understand and build a BitTorrent search engine. While listening the network for "get_peers" messages, I manage to grab infohashes. I proceed to ask the corresponding DHT node for it's peers. In my understanding in order to find out if the infohash is valid, (for starters) I have to send a BitTorrent handshake to the peers and compare the responses. However, besides the connection refused errors which I ignore for now, most peers reply with empty responses. Am I doing something wrong here? Note that the following code samples are not a great implementation, I just want to understand the flow.

Handshake function:


import socket 

def handshake(infohash, peer):
    peer_id = b"-TR2940-k8hj0wgej6ch"

    handshake = b'\x13'
    handshake += b'BitTorrent protocol'
    handshake += b'\x00\x00\x00\x00\x00\x10\x00\x00'
    handshake += infohash
    handshake += peer_id

    try:
        ClientSocket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        ClientSocket.settimeout(3)
        ClientSocket.connect(peer)
        print("Connected to peer.")
        ClientSocket.sendall(handshake)
        response = ClientSocket.recv(68)

        if not response:
            print("Empty response.")
            return
        print(f"Handshake completed, resp: {response}")

        ClientSocket.close()        
    except Exception as e:
        print(e)

Utilities to get peers from given infohash and DHT node:

import random
import uuid
import bencode
import socket
from struct import unpack
import handshake

def newTID(tidlen):
    tid = ""
    for i in range(0, tidlen):
        tid += chr(random.randint(97, 122))
    return tid

def newID():
  return uuid.uuid4().hex[0:20]

def split_nodes(nodes):
    length = len(nodes)
    if (length % 26) != 0:
        return
    for i in range(0, length, 26):
        nid = nodes[i:i+20]
        ip = socket.inet_ntoa(nodes[i+20:i+24])
        port = unpack("!H", nodes[i+24:i+26])[0]
        yield nid, ip, port

UDPClientSocket = socket.socket(socket.AF_INET, socket.SOCK_DGRAM, socket.IPPROTO_UDP)
UDPClientSocket.settimeout(4)

def get_peers_from_infohash(infohash, node):
    get_peers_query = {"t":"aa", "y":"q", "q":"get_peers", "a": {"id":newID(), "info_hash":infohash}}
    get_peers_query = bencode.encode(get_peers_query)
    
    UDPClientSocket.sendto(get_peers_query, node)

    received = UDPClientSocket.recvfrom(65536)
    msg = received[0]
    decoded = bencode.decode(msg)

    peers = split_nodes(decoded["r"]["nodes"])
    for nid, ip, port in peers:
        print(infohash, infohash.hex(),  ip, port)
        handshake.handshake(infohash, (ip, port))
    

My DHT crawler:


import bencode
import socket
import uuid
from struct import unpack
import threading
import random
import dhtutils

def newTID(tidlen):
    tid = ""
    for i in range(0, tidlen):
        tid += chr(random.randint(97, 122))
    return tid

def newID():
  return uuid.uuid4().hex[0:20]

def handle_message(msg, node):
    if msg.get("e"):
        # print(msg.get("e"))
        pass
    elif msg.get("y") == "r":
       handle_response(msg, node)
    elif msg.get("y") == "q":
        handle_query(msg, node)

def handle_query(msg, node):
    try:
        if msg["q"] == "get_peers":
            infohash = msg["a"]["info_hash"]
            # print(infohash.hex(), msg, node)
            print(infohash.hex())
            dhtutils.get_peers_from_infohash(infohash, node)
    except:
        pass

def handle_response(msg, node):
    global all_nodes
    if msg.get("r").get("nodes"):
        # response from find_nodes
        nodes = msg.get("r").get("nodes")
        if nodes:
            nodes = split_nodes(nodes)
            for id, ip, port in nodes:
                find_nodes(id, (ip, port))
                all_nodes.append((id, (ip, port)))
    elif msg.get("t") == "pg":
        # response from ping
        id = msg["r"]["id"]
        all_nodes.append((id, node))

def split_nodes(nodes):
    length = len(nodes)
    if (length % 26) != 0:
        return
    for i in range(0, length, 26):
        nid = nodes[i:i+20]
        ip = socket.inet_ntoa(nodes[i+20:i+24])
        port = unpack("!H", nodes[i+24:i+26])[0]
        yield nid, ip, port

def find_nodes(id, node):
    global UDPClientSocket
    find_node_query = {"t":newTID(2), "y":"q", "q":"find_node", "a": {"id":newID(), "target":id}}
    find_node_query = bencode.encode(find_node_query)
    UDPClientSocket.sendto(find_node_query, node)

def ping(node):
    global UDPClientSocket
    ping_query = {"t":"pg", "y":"q", "q":"ping", "a":{"id":newID()}}
    ping_query = bencode.encode(ping_query)
    UDPClientSocket.sendto(ping_query, node)

def listen():
    while True:
        try:
            received = UDPClientSocket.recvfrom(65536)
            msg = received[0]
            src = received[1]
            decoded = bencode.decode(msg)
            handle_message(decoded, src)
        except Exception as e:
            pass

UDPClientSocket = socket.socket(socket.AF_INET, socket.SOCK_DGRAM, socket.IPPROTO_UDP)

T = threading.Thread(target=listen)
T.start()

nodes = [
    ("router.bittorrent.com", 6881),
    ("dht.transmissionbt.com", 6881),
    ("router.utorrent.com", 6881)
]

for node in nodes:
    ping(node)

all_nodes = []

while True:
    if len(all_nodes) > 0:
       for node in all_nodes:
           find_nodes(node[0], node[1])

Solution

  • Your DHT lookup is incorrect since you're looking at the nodes response field which contains DHT nodes used for finding other DHT nodes when performing an iterative lookup in the DHT. It is not the values field that contains bittorrent contacts.

    You're only going to get the latter when you have properly routed to the target region that covers an infohash in the DHT keyspace.