pythonflask

How can I stream a large CSV download in Flask without loading the whole file into memory?


I have an endpoint that exports the report as a CSV.

import io, csv
from flask import Flask, Response

app = Flask(__name__)

def get_report_rows():
    # imagine 5 million DB rows here
    for i in range(5_000_000):
        yield (i, f"name-{i}", i % 100)

@app.get("/export")
def export():
    rows = get_report_rows()

    output = io.StringIO()                   
    writer = csv.writer(output)
    writer.writerow(["id", "name", "score"])
    writer.writerows(rows)
    output.seek(0)

    return Response(
        output.getvalue(),
        mimetype="text/csv",
        headers={"Content-Disposition": "attachment; filename=report.csv"}
    )

For large datasets, this endpoint crashes. How can I create the endpoint such that I can stream a CSV row by row?


Solution

  • from flask import Flask, Response, stream_with_context
    import csv, io
    
    app = Flask(__name__)
    
    def get_report_rows():
        for i in range(5_000_000):
            yield i, f"name-{i}", i % 100
    
    @app.get("/export")
    def export_csv():
        def generate():
            buf = io.StringIO()
            writer = csv.writer(buf)
    
            writer.writerow(("id", "name", "score"))
            yield buf.getvalue()
            buf.seek(0), buf.truncate(0)
    
    
            for row in get_report_rows():
                writer.writerow(row)
                yield buf.getvalue()
                buf.seek(0), buf.truncate(0)
    
        headers = {
            "Content-Disposition": "attachment; filename=report.csv",
            "X-Accel-Buffering": "no",
        }
        return Response(
            stream_with_context(generate()),   
            mimetype="text/csv",
            headers=headers,
            direct_passthrough=True,           
        )
    

    We reuse the tiny buffer io.StringIO() , we seek and truncate after every yield. This prevents from the RAM from growing no matter how many rows are streamed.

    The content disposition header allows file-save dialog.