pythongoogle-bigquery

How do I put a primary key constraint in BigQuery using the Python API?


I am creating a BigQuery table using the Python API. I want to make user_id as the primary key. How do I add this constraint using the Python API?

from google.cloud import bigquery
def create_table():
    schema = [
        bigquery.SchemaField("user_id", "STRING", mode="REQUIRED"),
        bigquery.SchemaField("first_name", "STRING"),
        bigquery.SchemaField("last_name", "STRING"),
        bigquery.SchemaField("age", "INTEGER")
    ]
    table = bigquery.Table(table_id, schema=schema)
    table = client.create_table(table)

Solution

  • Quick search shows that v3.11.4 doesn't support tableConstraints option.

    Subclass bigquery.Table and set the tableConstraints.primaryKey field in the API request and you should have your table created with the desired constraints.

    class Table(bigquery.Table):
        def __init__(self, *args, **kwargs):
           super(*args, **kwargs)
    
        def set_primary_key(self, primary_key):
            if 'tableConstraints' not in self._properties:
                self._properties['tableConstraints'] = {}
            self._properties['tableConstraints']['primaryKey'] = primary_key        
    
    
    def create_table(table_id,):
        schema = [
            bigquery.SchemaField("user_id", "STRING", mode="REQUIRED"),
            bigquery.SchemaField("first_name", "STRING"),
            bigquery.SchemaField("last_name", "STRING"),
            bigquery.SchemaField("age", "INTEGER")
        ]
        primary_key={"columns": ["user_id"]}
        table = Table(table_id, schema=schema)
        table.set_primary_key(primary_key)
        table = client.create_table(table)