mongodbrustbinarybsonraw

Reading .bson file with Rust


I was very curious about .bson binary file produced by mongodump. At first, my guessing was BSON Array Bson::Array(array_of_documents) Then:

#Cargo.toml

[dependecies]
...
bson = "2.1.0"
//main.rs
fn main()
{
    let mut path=std::env::current_dir().unwrap();
    path.push("backup/transactions.bson");

    if path.exists() {
        match std::fs::File::open(&path)
        {
            Err(err)=>panic!("Failed to open '{}'",path.to_str().unwrap(),err),
            Ok(file)=>{
                if let Ok(bson_file)=bson::Bson::from_reader(&file) {
                    if let Some(bson_array)=bson_file.as_array() {
                        for bson_data in bson_array {
                            println!("BSON Data: {:?}",&bson_data);
                        }
                    }  else {
                        println!("Not an array");
                    }
                } else {
                    println!("Not a BSON file");
                }
            }
        }
    }
}

Then I ran:

$ cargo run
...
Not an array

Clearly a BSON file, but not an array! What was it then? 🤔


Solution

  • Then I thought.. what if it was a Document Bson::Document(bson_data)?

    //main.rs
    fn main()
    {
        let mut path=std::env::current_dir().unwrap();
        path.push("backup/transactions.bson");
    
        if path.exists() {
            match std::fs::File::open(&path)
            {
                Err(err)=>panic!("Failed to open '{}'",path.to_str().unwrap(),err),
                Ok(mut file)=>{
                    if let Ok(doc_data)=bson::Document::from_reader(&mut file) {
                       //Print all keys of the document
                       println!("Document keys: {:?}",doc_data.keys().map(|x| x.as_str()).collect::<Vec<&str>>());
                    } else {
                       println!("Not a document");
                    }
                }
            }
        }
    }
    
    $ cargo run
    ...
    ["_id","code","type","customerName","items","createdAt"]
    

    Those keys are my Transaction data! 😮 But it only retrieves one document and prints all keys of the document. Where are the rest of those documents? I have more than 50 documents dumped in .bson file. Then I suspected something, what if.. a .bson file is consisted of a series of documents being placed next to the other.

    //main.rs
    fn main()
    {
        let mut path=std::env::current_dir().unwrap();
        path.push("backup/transactions.bson");
    
        if path.exists() {
            match std::fs::File::open(&path)
            {
                Err(err)=>panic!("Failed to open '{}'",path.to_str().unwrap(),err),
                Ok(mut file)=>{
                    //Read file size
                    let n=file.seek(std::io::SeekFrom::End(0)).unwrap_or(0);
    
                    //Put file pointer at the beginning of file
                    if file.seek(std::io::SeekFrom::Start(0)).is_ok() {
                        //Hold the number of retrieved documents
                        let mut i=0_u16;
    
                        //Read only 50 documents or end of file has been reached
                        while i < 50 && file.stream_position().unwrap_or(0) < n {
                           if let Ok(doc_data) = bson::Document::from_reader(&mut file) {
                               println!("Document: {}. {:?}",i+1,&doc_data);
                               i+=1;
                           }
                        }
                    }
                }
            }
        }
    }
    
    $ cargo run
    ...
    Document: 1. Document({"_id": ObjectId("63ee1b8b4b7c65e95ba891bd"), "originalId": ObjectId("6252fab17b201412088a4aaa"), "client": ObjectId("63d62de44370c4d5f1670f78"), "type": String("room"),...
    ...
    Document: 36. Document({"_id": ObjectId("63ee1b8b4b7c65e95ba891e7"), "originalId": ObjectId("625300cb7b201412088a4d30"), "client": ObjectId("63d62de44370c4d5f1670f78"),...
    

    Yes..my guessing was right. A .bson file is a series of Bson::Document(data) being placed next to the other