The problem I am trying to solve is getting feature
data from JSON fields so that resulting column name is preserved. This translates in trying to understand whether there is a better and/or safer way to dynamically define column aliases using psycopg(3).
I currently have the implemented the following solution:
# imports
import psycopg
from psycopg import Connection, sql
from psycopg.rows import dict_row
# constants
project = "project_1"
location = "location_1"
data_table = "table_1"
features = ["feature_1", "feature_2"]
start_dt = "2024-04-22T16:00:00"
end_dt = "2024-04-22T17:00:00"
__user = "My"
__password = "I am supposed to be extra-complicated!"
__host = "localhost"
__database = "db"
__port = 5432
# connection
connection = psycopg.connect(
user=__user,
password=__password,
host=__host,
dbname=__database,
port=__port,
row_factory=dict_row,
)
# Adapted from https://github.com/psycopg/psycopg2/issues/791#issuecomment-429459212
def alias_identifier(
ident: str | tuple[str], alias: str | None = None
) -> sql.Composed:
"""Return a SQL identifier with an optional alias."""
if isinstance(ident, str):
ident = (ident,)
if not alias:
return sql.Identifier(*ident)
# fmt: off
return sql.Composed([sql.Literal(*ident), sql.SQL(" AS "),
sql.Identifier(alias)])
# fmt: on
# source query str
QUERY = """SELECT
current_database() AS project,
timestamp,
location,
feature -> {feature}
FROM {data_table}
WHERE lower(location) = {location}
AND timestamp BETWEEN {start_dt} AND {end_dt}
"""
# SQL query
query = sql.SQL(QUERY).format(
feature=sql.SQL(", feature -> ").join([alias_identifier(m, alias=m) for m in features]),
data_table=sql.Identifier(data_table),
location=sql.Literal(location),
start_dt=sql.Literal(start_dt),
end_dt=sql.Literal(end_dt),
)
print(query.as_string(connection))
SELECT
current_database() AS project,
timestamp,
location,
feature -> 'feature_1' AS "feature_1", feature -> 'feature_2' AS "feature_2"
FROM "table_1"
WHERE lower(location) = 'location_1'
AND timestamp BETWEEN '2024-04-22T16:00:00' AND '2024-04-22T17:00:00'
The solution provides the expected results, although I am wondering whether it violates any of the psycopg guidelines and whether there is a better way to achieve what I want.
In my usage I do:
QUERY = sql.SQL("""SELECT
current_database() AS project,
timestamp,
location,
feature -> %(feature)s
FROM {data_table}
WHERE lower(location) = %(location)s
AND timestamp BETWEEN %(start_dt)s AND %(end_dt)s
""").format(data_table=sql.Identifier(data_table))
and then:
cur.execute(QUERY, {"feature": feature, "location": "somewhere", "start_dt": "2024-02-22", "end_dt": "2024-04-30"})
Then you don't have to rebuild the query for changes in the user supplied arguments(location, start_dt, end_dt).