I have an endpoint that exports the report as a CSV.
import io, csv
from flask import Flask, Response
app = Flask(__name__)
def get_report_rows():
# imagine 5 million DB rows here
for i in range(5_000_000):
yield (i, f"name-{i}", i % 100)
@app.get("/export")
def export():
rows = get_report_rows()
output = io.StringIO()
writer = csv.writer(output)
writer.writerow(["id", "name", "score"])
writer.writerows(rows)
output.seek(0)
return Response(
output.getvalue(),
mimetype="text/csv",
headers={"Content-Disposition": "attachment; filename=report.csv"}
)
For large datasets, this endpoint crashes. How can I create the endpoint such that I can stream a CSV row by row?
from flask import Flask, Response, stream_with_context
import csv, io
app = Flask(__name__)
def get_report_rows():
for i in range(5_000_000):
yield i, f"name-{i}", i % 100
@app.get("/export")
def export_csv():
def generate():
buf = io.StringIO()
writer = csv.writer(buf)
writer.writerow(("id", "name", "score"))
yield buf.getvalue()
buf.seek(0), buf.truncate(0)
for row in get_report_rows():
writer.writerow(row)
yield buf.getvalue()
buf.seek(0), buf.truncate(0)
headers = {
"Content-Disposition": "attachment; filename=report.csv",
"X-Accel-Buffering": "no",
}
return Response(
stream_with_context(generate()),
mimetype="text/csv",
headers=headers,
direct_passthrough=True,
)
We reuse the tiny buffer io.StringIO()
, we seek and truncate after every yield. This prevents from the RAM from growing no matter how many rows are streamed.
The content disposition header allows file-save dialog.