rustbittorrent

Getting info hash from .torrrent file


I dont understand how to sha1 hash the info dictionary of a .torrent info dictionary in rust. I know i cant extract the info dictionary dicrectly cause it has invalid ut8 characters so i use serde::bencode to extract that info.


#[derive(Deserialize,Clone,Debug)]
struct Info {
    name:String, // Name of the torrent file
    #[serde(rename = "piece length")]
    length:usize, // Size of each piece as bytes     
    pieces:ByteBuf, // raw SHA1 Hashes of pieces
    #[serde(flatten)]
    keys:Keys // Can be lenght if its single file torrent or 'files' if its a multi file torrent
}

#[derive(Clone,Debug)]
struct Torrent {
    announce:String,
    raw_info:Vec<u8>,
    info:Info,
}


#[derive(Deserialize,Clone,Debug)]
#[serde(untagged)]
enum Keys{

    SingleFile {
        length:usize
    },
    MultiFile {
        files:Vec<File>
    }
}


#[derive(Deserialize,Clone,Debug)]
struct File{
    length:usize, // File size in bytes
    path:Vec<String> //File path split in directories
}


impl<'de> Deserialize<'de> for Torrent {
    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
    where
        D: Deserializer<'de>,
    {
        let mut map = BTreeMap::<String, Value>::deserialize(deserializer)?;

        // Extract announce
        let announce_value = map
            .remove("announce")
            .ok_or_else(|| serde::de::Error::missing_field("announce"))?;

        let announce = match announce_value {
            Value::Bytes(bytes) => String::from_utf8(bytes)
                .map_err(|e| serde::de::Error::custom(e.to_string()))?,
            _ => return Err(serde::de::Error::custom("announce must be a bencoded string")),
        };

        // Extract raw info as bencoded dictionary (not bytes)
        let raw_info_value = map
            .remove("info")
            .ok_or_else(|| serde::de::Error::missing_field("info"))?;

        // Debugging raw_info_value to see the structure
        //println!("Raw info value: {:?}", raw_info_value);

        let raw_info_bytes = match raw_info_value {
            Value::Dict(_) => {
                 // Serialize the `info` dictionary to bencoded bytes
                 serde_bencode::to_bytes(&raw_info_value)
                 .map_err(|e| serde::de::Error::custom(format!("Bencode serialization error: {}", e)))?
            }
            _ => return Err(serde::de::Error::custom("info must be a bencoded dictionary")),
        };

        //println!("raw_info_byts: {:?}",raw_info_bytes);

        // Return the Torrent struct with the correct info_hash
        let info: Info = serde_bencode::from_bytes(&raw_info_bytes)
            .map_err(|e| serde::de::Error::custom(format!("Bencode deserialization error: {}", e)))?;


        Ok(Torrent {
            announce,
            raw_info: raw_info_bytes,
            info,
        })
    }
}

from this i can get the info about the torrent but getting the correct info hash has been a struggle as i cant make torrent get request without the correct info hash. Here is how i have tried it :

fn makequery(path: String) -> TorrentGetRequest {
    let torrent_file = std::fs::read(path).expect("Failed to read torrent file");
    let data: Torrent = serde_bencode::from_bytes(&torrent_file).expect("Parsing failed");

    let url1 = data.announce;

    println!("info hash raw:{:?}",data.raw_info);
    let info_hash = compute_info_hash(&data.raw_info);
    println!("compute info hash:{:?}",info_hash);
    let encoded_info_hash = url_encode_info_hash(&info_hash);
    println!("econded_info_hash:{:?}",encoded_info_hash);

    TorrentGetRequest {
        url: url1,
        info_hash: encoded_info_hash, 
        peer_id: "11111222223333344444".to_string(),
        port: 6881,
        uploaded: 0,
        dowloaded: 0,
        left: 0,
        compact: 1,
    }
}

pub fn compute_info_hash(raw_info: &[u8]) -> [u8; 20] {
    let hash_result = Sha1::digest(raw_info);
    let mut info_hash = [0u8; 20];
    info_hash.copy_from_slice(&hash_result[..20]);
    
    println!("Computed Info Hash (Hex): {}", hex::encode(&info_hash));
    info_hash
}
fn url_encode_info_hash(info_hash: &[u8]) -> String {
    form_urlencoded::byte_serialize(info_hash).collect::<String>()
}

along with my termnial response :

info hash raw:[100, 54, 58, 108, 101, 110, 103, 116, 104, 105, 57, 50, 48, 54, 51, 101, 52, 58, 110, 97, 109, 101, 49, 48, 58, 115, 97, 109, 112, 108, 101, 46, 116, 120, 116, 49, 50, 58, 112, 105, 101, 99, 101, 32, 108, 101, 110, 
103, 116, 104, 105, 51, 50, 55, 54, 56, 101, 54, 58, 112, 105, 101, 99, 101, 115, 54, 48, 58, 232, 118, 246, 122, 42, 136, 134, 232, 243, 107, 19, 103, 38, 195, 15, 162, 151, 3, 2, 45, 110, 34, 117, 230, 4, 160, 118, 102, 86, 115, 110, 129, 255, 16, 181, 82, 4, 173, 141, 53, 240, 13, 147, 122, 2, 19, 223, 25, 130, 188, 141, 9, 114, 39, 173, 158, 144, 154, 204, 23, 101]
Computed Info Hash (Hex): d69f91e6b2ae4c542468d1073a71d4ea13879a7f
compute info hash:[214, 159, 145, 230, 178, 174, 76, 84, 36, 104, 209, 7, 58, 113, 212, 234, 19, 135, 154, 127]
econded_info_hash:"%D6%9F%91%E6%B2%AELT%24h%D1%07%3Aq%D4%EA%13%87%9A%7F"
Url request to http://bittorrent-test-tracker.codecrafters.io/announce
info hash: %D6%9F%91%E6%B2%AELT%24h%D1%07%3Aq%D4%EA%13%87%9A%7F
Response: d14:failure reason25:provided invalid infohashe

I am not to sure if its how im deserializing the torrent file but i have seen other people take this same approach but mine just doesnt work and im not sure why.

Edit , didnt show i turned it into a url:

pub async fn print_peers(path: String) -> Result<(), Box<dyn std::error::Error>> {
    let data = makequery(path);
    let client = reqwest::Client::new();
    let url = data.url.clone();

    println!("Url request to {}", url);
    println!("info hash: {}",data.info_hash);

    let mut params = HashMap::new();
    params.insert("info_hash", data.info_hash);
    params.insert("peer_id", data.peer_id);
    params.insert("port", data.port.to_string());
    params.insert("uploaded", data.uploaded.to_string());
    params.insert("downloaded", data.dowloaded.to_string());
    params.insert("left", data.left.to_string());
    params.insert("compact", data.compact.to_string());

    let response = client.get(url).query(&params).send().await?;

    if response.status().is_success() {
        let body = response.text().await?;
        println!("Response: {}", body);
    } else {
        print_decode(response.status().to_string());

    }

    Ok(())
}

Thanks to a reponse i figured out i was getting the right info hash and even improved how i got the hash, my issue was i wasnt making the right url for the torrent.get request. I had to url encode evey param expect for the info hash , here is my final solution that works fine :

pub async fn print_peers(path: String) -> Result<(), Box<dyn std::error::Error>> {
    let data = makequery(path);
    let client = Client::new();
    let base_url = data.url.clone();

    println!("Url request to {}", base_url);
    println!("info hash: {}", data.info_hash);

    // URL encode all parameters except "info_hash"
    let mut params = HashMap::new();
    params.insert("peer_id", data.peer_id);
    params.insert("port", data.port.to_string());
    params.insert("uploaded", data.uploaded.to_string());
    params.insert("downloaded", data.dowloaded.to_string());
    params.insert("left", data.left.to_string());
    params.insert("compact", data.compact.to_string());

    // Serialize parameters (except info_hash) using serde_urlencoded
    let encoded_params = serde_urlencoded::to_string(&params)?;

    // Manually construct final URL with unencoded info_hash
    let tracker_url = format!("{}?{}&info_hash={}", base_url, encoded_params, data.info_hash);

    println!("Final URL: {}", tracker_url);

    let response = client.get(&tracker_url).send().await?;
    let response = response.bytes().await?;
    println!("Bencode response: {:?}",response);

 
    Ok(())
}

fn makequery(path: String) -> TorrentGetRequest {
    let torrent_file = std::fs::read(path).expect("Failed to read torrent file");

    let raw_data = BencodeRef::decode(&torrent_file, BDecodeOpt::default()).unwrap();
    let lookup = raw_data.dict().unwrap().lookup("info".as_bytes()).unwrap();
    let raw_lookup = BencodeRef::buffer(lookup);

    let announce_raw =  raw_data.dict().unwrap().lookup("announce".as_bytes()).unwrap();
    let announce =  announce_raw.str().unwrap();

    let url1 = announce.to_string();
    let info_hash = compute_info_hash(&raw_lookup);

    // Only encode non-alphanumeric characters in info_hash
    let encoded_info_hash = percent_encode(&info_hash, NON_ALPHANUMERIC).to_string();

    TorrentGetRequest {
        url: url1,
        info_hash:encoded_info_hash, 
        peer_id: "11111222223333344444".to_string(),
        port: 6881,
        uploaded: 0,
        dowloaded: 0,
        left: 0,
        compact: 1,
    }
}

Solution

  • BEP3 explains that the decode-encode roundtrip may yield incorrect results if your parser isn't validating and your inputs violate some requirements.

    You could try bip-bencode which provides a decoder that provides access to the raw bytes.

    TorrentGetRequest

    You're not showing how that gets turned into a URL.

    but getting the correct info hash has been a struggle as i cant make torrent get request

    Rather than trying to make a request you should test against a bunch of torrent files with already-known infohashes. It cuts out a remote system you don't control from your testing.