pythonpymongogridfs

Copy files from gridfs to another gridfs database


I am searching for a way to copy files in gridfs. The idea is to keep all additional and metadata of the file, like the same "_id" etc.

The use case is to setup a testing database with fraction of files in gridfs and maintaining the references to other collections and documents that are copied.

My attempt in Python was to do this, but this already creates a new ObjectId for the inserted file.

import pymongo
import gridfs

...
    fs1 = gridfs.GridFS(database=db1, collection="photos")
    buffer = buffer = fs1.find_one({"_id": photo["binaryId"]})
    fs2 = gridfs.GridFS(database=db2, collection="photos")
    fs2.put(buffer)

Update

I found a place where the information is kept.

    fs2.put(buffer, **buffer._file)

Solution

  • You can copy all attributes from gridfs by creating a new file and passing all the attributes of an existing file to it, eg:

    import io
    import pymongo
    import gridfs
    import mongoengine
    from mongoengine import get_connection, register_connection, Document, StringField, ImageField
    
    
    register_connection("default", "db1")
    register_connection("db2", "db2")
    
    
    conn1 = get_connection("default")
    conn2 = get_connection("db2")
    
    
    class MyPhoto(Document):
        foo = StringField()
        bar = StringField()
        content = ImageField(required=True, thumbnail_size=None, collection_name="photos")
    
    
    myphoto = MyPhoto()
    myphoto.foo = "foo"
    myphoto.bar = "bar"
    with open("tiny.png", "rb") as fh:                                                                                                                                   
        myphoto.content.put(fh)                                                                                                                                          
    #myphoto.content.put(io.BytesIO(data))
    myphoto.save()
    
    fs1 = gridfs.GridFS(database=conn1["db1"], collection="photos")
    fso1 = fs1.find_one({"_id": myphoto.content.grid_id})
    data = fso1.read()
    fs2 = gridfs.GridFS(database=conn2["db2"], collection="photos")
    
    try:
        fs2.delete(fso1._id)
    except gridfs.errors.NoFile:
        pass
    with fs2.new_file(**fso1._file) as fh:
        fh.write(data)
    
    fso1 = fs2.find_one({"_id": myphoto.content.grid_id})
    fso2 = fs2.find_one({"_id": fso1._id})
    
    assert fso1.read() == fso2.read()
    assert fso1._file == fso2._file