I am searching for a way to copy files in gridfs. The idea is to keep all additional and metadata of the file, like the same "_id" etc.
The use case is to setup a testing database with fraction of files in gridfs and maintaining the references to other collections and documents that are copied.
My attempt in Python was to do this, but this already creates a new ObjectId for the inserted file.
import pymongo
import gridfs
...
fs1 = gridfs.GridFS(database=db1, collection="photos")
buffer = buffer = fs1.find_one({"_id": photo["binaryId"]})
fs2 = gridfs.GridFS(database=db2, collection="photos")
fs2.put(buffer)
Update
I found a place where the information is kept.
fs2.put(buffer, **buffer._file)
You can copy all attributes from gridfs by creating a new file and passing all the attributes of an existing file to it, eg:
import io
import pymongo
import gridfs
import mongoengine
from mongoengine import get_connection, register_connection, Document, StringField, ImageField
register_connection("default", "db1")
register_connection("db2", "db2")
conn1 = get_connection("default")
conn2 = get_connection("db2")
class MyPhoto(Document):
foo = StringField()
bar = StringField()
content = ImageField(required=True, thumbnail_size=None, collection_name="photos")
myphoto = MyPhoto()
myphoto.foo = "foo"
myphoto.bar = "bar"
with open("tiny.png", "rb") as fh:
myphoto.content.put(fh)
#myphoto.content.put(io.BytesIO(data))
myphoto.save()
fs1 = gridfs.GridFS(database=conn1["db1"], collection="photos")
fso1 = fs1.find_one({"_id": myphoto.content.grid_id})
data = fso1.read()
fs2 = gridfs.GridFS(database=conn2["db2"], collection="photos")
try:
fs2.delete(fso1._id)
except gridfs.errors.NoFile:
pass
with fs2.new_file(**fso1._file) as fh:
fh.write(data)
fso1 = fs2.find_one({"_id": myphoto.content.grid_id})
fso2 = fs2.find_one({"_id": fso1._id})
assert fso1.read() == fso2.read()
assert fso1._file == fso2._file