I'm trying to import a SQLite table into a Polars df as follows:
import polars as pl
uri = "sqlite:///tmp/dbname.db"
query = ("SELECT * from alib")
df = pl.read_database_uri(query=query, uri=uri)
The first column in the SQLite table is called __path and its data type is BLOB.
When I try to import it into a Polars df it throws:
Traceback (most recent call last):
File "/home/x/mypy/lib/python3.12/site-packages/polars/io/database/_utils.py", line 54, in _read_sql_connectorx
tbl = cx.read_sql(
^^^^^^^^^^^^
File "/home/x/mypy/lib/python3.12/site-packages/connectorx/__init__.py", line 386, in read_sql
result = _read_sql(
^^^^^^^^^^
RuntimeError: Invalid column type Text at index: 0, name: __path
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "<input>", line 1, in <module>
File "/home/x/mypy/lib/python3.12/site-packages/polars/io/database/functions.py", line 396, in read_database_uri
return _read_sql_connectorx(
^^^^^^^^^^^^^^^^^^^^^
File "/home/x/mypy/lib/python3.12/site-packages/polars/io/database/_utils.py", line 66, in _read_sql_connectorx
raise type(err)(errmsg) from err
RuntimeError: Invalid column type Text at index: 0, name: __path
query = ("SELECT * from alib")
df = pl.read_database_uri(query=query, uri=uri)
Traceback (most recent call last):
File "/home/x/mypy/lib/python3.12/site-packages/polars/io/database/_utils.py", line 54, in _read_sql_connectorx
tbl = cx.read_sql(
^^^^^^^^^^^^
File "/home/x/mypy/lib/python3.12/site-packages/connectorx/__init__.py", line 386, in read_sql
result = _read_sql(
^^^^^^^^^^
RuntimeError: Invalid column type Text at index: 0, name: __path
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "<input>", line 1, in <module>
File "/home/x/mypy/lib/python3.12/site-packages/polars/io/database/functions.py", line 396, in read_database_uri
return _read_sql_connectorx(
^^^^^^^^^^^^^^^^^^^^^
File "/home/x/mypy/lib/python3.12/site-packages/polars/io/database/_utils.py", line 66, in _read_sql_connectorx
raise type(err)(errmsg) from err
RuntimeError: Invalid column type Text at index: 0, name: __path
Does Polars not handle SQLite BLOB fields at all or is there something I need to do to get working?
Schema as requested:
CREATE TABLE alib (
__path BLOB UNIQUE,
__filename BLOB,
__dirpath BLOB,
__filename_no_ext BLOB,
__ext BLOB,
__accessed TEXT,
__app TEXT,
__bitrate TEXT,
__bitspersample TEXT,
__bitrate_num TEXT,
__frequency_num TEXT,
__frequency TEXT,
__channels TEXT,
__created TEXT,
__dirname TEXT,
__file_access_date TEXT,
__file_access_datetime TEXT,
__file_access_datetime_raw TEXT,
__file_create_date TEXT,
__file_create_datetime TEXT,
__file_create_datetime_raw TEXT,
__file_mod_date TEXT,
__file_mod_datetime TEXT,
__file_mod_datetime_raw TEXT,
__file_size TEXT,
__file_size_bytes TEXT,
__file_size_kb TEXT,
__file_size_mb TEXT,
__filetype TEXT,
__image_mimetype TEXT,
__image_type TEXT,
__layer TEXT,
__length TEXT,
__length_seconds TEXT,
__mode TEXT,
__modified TEXT,
__num_images TEXT,
__parent_dir TEXT,
__size TEXT,
__tag TEXT,
__tag_read TEXT,
__version TEXT,
__vendorstring TEXT,
__md5sig TEXT,
tagminder_uuid BLOB,
sqlmodded TEXT,
reflac TEXT,
discnumber TEXT,
track TEXT,
title TEXT,
subtitle TEXT,
work TEXT,
part TEXT,
live TEXT,
composer TEXT,
arranger TEXT,
lyricist TEXT,
writer TEXT,
artist TEXT,
performer TEXT,
personnel TEXT,
conductor TEXT,
engineer TEXT,
producer TEXT,
mixer TEXT,
remixer TEXT,
albumartist TEXT,
discsubtitle TEXT,
album TEXT,
version TEXT,
releasetype TEXT,
year TEXT,
originaldate TEXT,
originalreleasedate TEXT,
originalyear TEXT,
genre TEXT,
style TEXT,
mood TEXT,
theme TEXT,
rating TEXT,
compilation TEXT,
bootleg TEXT,
label TEXT,
musicbrainz_albumartistid TEXT,
musicbrainz_albumid TEXT,
musicbrainz_artistid TEXT,
musicbrainz_composerid TEXT,
musicbrainz_discid TEXT,
musicbrainz_producerid TEXT,
musicbrainz_releasegroupid TEXT,
musicbrainz_releasetrackid TEXT,
musicbrainz_trackid TEXT,
musicbrainz_workid TEXT,
lyrics TEXT,
unsyncedlyrics TEXT,
performancedate TEXT,
acoustid_fingerprint TEXT,
acoustid_id TEXT,
analysis TEXT,
asin TEXT,
barcode TEXT,
catalog TEXT,
catalognumber TEXT,
country TEXT,
discogs_artist_url TEXT,
discogs_release_url TEXT,
fingerprint TEXT,
isrc TEXT,
recordinglocation TEXT,
recordingstartdate TEXT,
replaygain_album_gain TEXT,
replaygain_album_peak TEXT,
replaygain_track_gain TEXT,
replaygain_track_peak TEXT,
review TEXT,
upc TEXT,
musicip_data TEXT,
type TEXT,
musicip_puid TEXT,
movement TEXT
);
You have invalid (likely text) data in your BLOB column. SQLite (unfortunately) allows mixed types in columns. To prevent this I would strongly suggest to only use SQLite with STRICT tables.
Look at the following example, t1 only contains BLOB values but t2 contains a mix of BLOB and TEXT values, t3 rejects this because it is strict:
sqlite> .open tmp.db
sqlite> CREATE TABLE t1 (b BLOB);
sqlite> CREATE TABLE t2 (b BLOB);
sqlit3> CREATE TABLE t3 (b BLOB) STRICT;
sqlite> INSERT INTO t1 VALUES (x'0001');
sqlite> INSERT INTO t2 VALUES (x'0001'), ('text');
sqlite> INSERT INTO t3 VALUES (x'0001'), ('text');
Runtime error: cannot store TEXT value in BLOB column t3.b (19)
sqlite> .exit
>>> import polars as pl
>>> pl.read_database_uri(query="SELECT * FROM t1", uri="sqlite://tmp.db")
shape: (1, 1)
┌─────────────┐
│ b │
│ --- │
│ binary │
╞═════════════╡
│ b"\x00\x01" │
└─────────────┘
>>> pl.read_database_uri(query="SELECT * FROM t2", uri="sqlite://tmp.db")
Traceback (most recent call last):
File "/Users/orlp/programming/rust/polars/py-polars/polars/io/database/_utils.py", line 54, in _read_sql_connectorx
tbl = cx.read_sql(
^^^^^^^^^^^^
File "/Users/orlp/programming/rust/polars/.venv/lib/python3.11/site-packages/connectorx/__init__.py", line 386, in read_sql
result = _read_sql(
^^^^^^^^^^
RuntimeError: Invalid column type Text at index: 0, name: b
You can fix it by casting the column to blob, which then will get read as binary:
>>> pl.read_database_uri(query="SELECT cast(b as BLOB) FROM t2", uri="sqlite://tmp.db")
shape: (2, 1)
┌─────────────────┐
│ CAST(b AS BLOB) │
│ --- │
│ binary │
╞═════════════════╡
│ b"\x00\x01" │
│ b"text" │
└─────────────────┘