Using Spark, I can create a delta table with a map column type: MAP<STRING, TIMESTAMP>
How do I create a delta table with a map type without Spark?
I have tried multiple approaches and none of them are working.
import pyarrow as pa
from deltalake import write_deltalake
# Create a sample Arrow Table with a map type
data = {
"id": pa.array([1, 2, 3]),
"name": pa.array(["Alice", "Bob", "Charlie"]),
"attributes": pa.array([
pa.array([("age", 30)], type=pa.map_(pa.string(), pa.int32())),
pa.array([("age", 25)], type=pa.map_(pa.string(), pa.int32())),
pa.array([("age", 35)], type=pa.map_(pa.string(), pa.int32())),
])
}
# Create an Arrow Table
table = pa.Table.from_pydict(data)
# Define the path where the Delta table will be stored
delta_table_path = "./tmp/delta_map"
# Write the Arrow Table to a Delta table
write_deltalake(delta_table_path, data=table, mode="overwrite")
pyarrow throws: pyarrow.lib.ArrowTypeError: Could not convert 'a' with type str: was expecting tuple of (key, value) pair
from deltalake import Schema, Field, DeltaTable, WriterProperties, write_deltalake
from deltalake.schema import PrimitiveType, MapType
# Define the schema for the Delta table
schema = Schema([
Field("id",PrimitiveType("string")),
Field("data", MapType("integer", "string", value_contains_null=False))
])
# Create a list of data to write to the Delta table
data = [
{"id": "1", "data": {"key1": "value1", "key2": "value2"}},
{"id": "2", "data": {"key3": "value3", "key4": "value4"}}
]
# Create a Delta table
delta_table = write_deltalake(table_or_uri="./tmp/delta_map", data=data,
schema=schema,mode="append",
writer_properties=WriterProperties(compression="ZSTD")
)
# Write the data to the Delta table
delta_table.write_data(data)
deltalake throws: NotImplementedError: ArrowSchemaConversionMode.passthrough is not implemented to work with DeltaSchema, skip passing a schema or pass an arrow schema.
Thx
To create Delta with map:
import pyarrow as pa
from deltalake import write_deltalake
table_path = "./tmp/my_table"
payload = [{"id": 1, "account_id": {17: "100.01.001 Cash"}}]
schema = pa.schema([
pa.field("id", pa.int32()),
pa.field("account_id", pa.map_(pa.int32(), pa.string())),
])
table = pa.Table.from_pylist(payload, schema)
write_deltalake(table_path,table,mode="overwrite",predicate="id = '1'",engine="rust",)
To read Delta with map:
import duckdb
table_path = "./tmp/my_table"
con = duckdb.connect()
df = con.sql(f"""SELECT * FROM delta_scan('{table_path}');""")
print(df)
con.close()