pythonfirebasegoogle-cloud-firestore

Is It Safe to Use stream() Inside a Firestore Transaction in Python?


While conducting a code review today, multiple AIs advised against using the stream() function within a Firestore transaction in Python.

However, I couldn't find any mention of this limitation in the official documentation:

https://firebase.google.com/docs/firestore/manage-data/transactions#python

I also tested the following code, and it runs without errors or crashes.

Could someone clarify whether this AI-generated advice is still valid or outdated?

Is there an undocumented limitation I should be aware of?

Thanks!

def update_markdown(transaction: firestore.Transaction, note_ref: firestore.DocumentReference, markdown: str):
    # Reference to the transcript subcollection
    markdown_ref = note_ref.collection('markdowns')
    
    #
    # Remove existing chunks
    #
    existing_docs = markdown_ref.stream()
    # Delete each document within the transaction
    for doc in existing_docs:
        doc_ref = markdown_ref.document(doc.id)
        transaction.delete(doc_ref)
        print("I THOUGHT WE SHOULD CRASH. BUT IT DIDN'T. WHY?")

    chunks = utils.chunk_text(markdown, MAX_CHUNK_SIZE)

    # Write each chunk to the transcript subcollection within the transaction
    for i, chunk in enumerate(chunks):
        chunk_doc_ref = markdown_ref.document()
        transaction.set(chunk_doc_ref, {
            'text': chunk,
            'order': i + 1
        })

@https_fn.on_request(
    cors=options.CorsOptions(
        cors_origins=["*"],
        cors_methods=["POST"],
    )
)
def edit_markdown(req: https_fn.Request) -> https_fn.Response:
    if req.method != 'POST':
        return https_fn.Response(
            json.dumps({"error": "Only POST requests are allowed"}),
            status=405
        )

    request_json = req.get_json(silent=True)
    if not request_json:
        return https_fn.Response(
            json.dumps({"error": "Invalid request data"}),
            status=400
        )
    
    uid = request_json['data']['uid']
    doc_id = request_json['data']['doc_id']
    new_markdown = request_json['data']['markdown']
    
    if utils.is_none_or_trimmed_empty(new_markdown):
        return https_fn.Response(
            json.dumps({"error": "Invalid request data"}),
            status=400
        )
    
    if len(new_markdown) > 524288:
        return https_fn.Response(
            json.dumps({"error": "Invalid request data"}),
            status=400
        )
    
    db = firestore.client()

    # Prepare timestamp
    current_timestamp = int(time.time() * 1000)
    
    try:
        @firestore.transactional
        def update_note(transaction):
            # References
            note_ref = (
                db.collection('users')
                .document(uid)
                .collection('notes')
                .document(doc_id)
            )

            current_markdown = markdown_utils.get_markdown(
                transaction=transaction,
                note_ref=note_ref
            )

            # 1. Title change logic
            if new_markdown != current_markdown:
                original_markdown = markdown_utils.get_original_markdown(
                    transaction=transaction,
                    note_ref=note_ref
                )

                if new_markdown == original_markdown:
                    # 2a. If user reverted back to the original markdown, remove it
                    markdown_utils.delete_original_markdown(
                        transaction=transaction,
                        note_ref=note_ref
                    )
                else:
                    # 2b. If this is the first time changing away from the original, save it
                    markdown_utils.insert_original_markdown_if_not_exist(
                        transaction=transaction,
                        note_ref=note_ref,
                        original_markdown=current_markdown
                    )

                # 4. Update markdown
                markdown_utils.update_markdown(
                    transaction=transaction,
                    note_ref=note_ref,
                    markdown=new_markdown
                )

                # 4. Update timestamps
                transaction.update(
                    note_ref,
                    {
                        'modified_timestamp': current_timestamp,
                        'synced_timestamp': current_timestamp
                    }
                )

        # Run in a transaction
        transaction = db.transaction()
        update_note(transaction)

        response_data = {
            "data": {
                "modified_timestamp": current_timestamp,
                "synced_timestamp": current_timestamp
            }
        }
        return https_fn.Response(
            json.dumps(response_data),
            status=200
        )

    except Exception as e:
        # Log the error with more context
        print(f"Error updating note markdown: {str(e)}")
        error_message = {
            "data": {
                "error": f"An error occurred: {str(e)}"
            }
        }
        return https_fn.Response(
            json.dumps(error_message),
            status=500
        )

Solution

  • You shouldn't do any reading or writing (including, but not limited to stream) of the database within a transaction that doesn't use the provided transaction object. That's because 1. Transactions should operate as fast as possible to avoid contention between multiple concurrent transactions and 2. A transaction can run multiple times in order to compensation for such contention, and you would end up reading those documents repeatedly and excessively.

    If you have some documents to read for use in a transaction, you should either use the transaction object to ensure that any changes to those documents cause the transaction to correctly retry (ensuring the transaction is atomic) , or do those reads fully before the transaction starts so that they don't impede the execution of the transaction.