[SOLVED] How can I stream a large CSV download in Flask without loading the whole file into memory?

How can I stream a large CSV download in Flask without loading the whole file into memory?

I have an endpoint that exports the report as a CSV.

import io, csv
from flask import Flask, Response

app = Flask(__name__)

def get_report_rows():
    # imagine 5 million DB rows here
    for i in range(5_000_000):
        yield (i, f"name-{i}", i % 100)

@app.get("/export")
def export():
    rows = get_report_rows()

    output = io.StringIO()                   
    writer = csv.writer(output)
    writer.writerow(["id", "name", "score"])
    writer.writerows(rows)
    output.seek(0)

    return Response(
        output.getvalue(),
        mimetype="text/csv",
        headers={"Content-Disposition": "attachment; filename=report.csv"}
    )

For large datasets, this endpoint crashes. How can I create the endpoint such that I can stream a CSV row by row?

Solution

from flask import Flask, Response, stream_with_context
import csv, io

app = Flask(__name__)

def get_report_rows():
    for i in range(5_000_000):
        yield i, f"name-{i}", i % 100

@app.get("/export")
def export_csv():
    def generate():
        buf = io.StringIO()
        writer = csv.writer(buf)

        writer.writerow(("id", "name", "score"))
        yield buf.getvalue()
        buf.seek(0), buf.truncate(0)


        for row in get_report_rows():
            writer.writerow(row)
            yield buf.getvalue()
            buf.seek(0), buf.truncate(0)

    headers = {
        "Content-Disposition": "attachment; filename=report.csv",
        "X-Accel-Buffering": "no",
    }
    return Response(
        stream_with_context(generate()),   
        mimetype="text/csv",
        headers=headers,
        direct_passthrough=True,           
    )

We reuse the tiny buffer io.StringIO() , we seek and truncate after every yield. This prevents from the RAM from growing no matter how many rows are streamed.

The content disposition header allows file-save dialog.