I want to concatenate with results like concat_columns
here:
In the example code below the results have quotation marks: Houston-Alice-"salary"
I do not want this, I want the results like this: Houston-Alice-salary
I've tried variations on the "'{}'".format(data_col)
to no avail. data_col
, "{}".format(data_col)
, and '{}'.format(data_col)
all return Houston-Alice-77321
.
The column names are user supplied so I need to use methods that prevent SQL injection attacks.
What should I try next?
Here is my example code:
import psycopg
import pandas as pd
def so_question(columns, group_by_columns, table):
"""
example for SO
"""
table = psycopg.sql.Composable.as_string(psycopg.sql.Identifier(table))
columns = [psycopg.sql.Composable.as_string(psycopg.sql.Identifier(col)) for col in columns]
group_by_columns = [psycopg.sql.Composable.as_string(psycopg.sql.Identifier(col)) for col in group_by_columns]
for data_col in columns:
group_by = ''
# check if there are grouping columns
if len(group_by_columns) > 0:
concat_columns = " ,'-',".join(group_by_columns) + " ,'-'," + "'{}'".format(data_col)
group_by_clause_group_by = " ,".join(group_by_columns) + " ," + data_col
# CTE generation:
sql_statement = f"""
WITH sql_cte as (
SELECT
CONCAT({concat_columns}) as concat_columns
,{data_col} as value
from {table}, unnest(array[{data_col}]) AS my_col
group by
{group_by_clause_group_by}
)
SELECT * FROM sql_cte
"""
return sql_statement
def execute_sql_get_dataframe(sql):
"""
Creates (and closes) db connection, gets requested sql data , returns as Pandas DataFrame.
Args:
sql (string): sql query to execute.
Returns:
Pandas DataFrame of sql query results.
"""
try:
# print(sql_statement_to_execute)
# create db connection, get connection and cursor object
db_connection_cursor = db_connection_cursor_object()
# execute query
db_connection_cursor['cursor'].execute(sql)
# get results into tuples
tuples_list = db_connection_cursor['cursor'].fetchall()
# get column names: https://www.geeksforgeeks.org/get-column-names-from-postgresql-table-using-psycopg2/
column_names = [desc[0] for desc in db_connection_cursor['cursor'].description]
db_connection_cursor['connection'].close()
# create df from results
df_from_sql_query = pd.DataFrame(tuples_list, columns=column_names)
return df_from_sql_query
except Exception as exc:
log.exception(f'sql_statement_to_execute:\n {sql}', exc_info=True)
log.exception(msg=f'Exception: {exc}', exc_info=True)
data_columns = ['salary']
group_by_columns_in_order_of_grouping = ['city', 'name']
_sql = so_question(columns=data_columns,
group_by_columns=group_by_columns_in_order_of_grouping,
table='random_data')
dataframe = execute_sql_get_dataframe(sql=_sql)
print(dataframe)
Here's the code for data generation:
import psycopg
# Connect to the database
conn = psycopg.connect("dbname=mydatabase user=myuser password=mypassword")
cur = conn.cursor()
# Create the sample table
cur.execute("""
CREATE TABLE sample_table (
city VARCHAR(50),
name VARCHAR(50),
salary INTEGER
)
""")
# Insert sample data into the table
cur.execute("""
INSERT INTO sample_table (city, name, salary) VALUES
('New York', 'Alice', 70000),
('Los Angeles', 'Bob', 80000),
('Chicago', 'Charlie', 75000),
('Houston', 'Eve', 71758),
('Phoenix', 'Dave', 68000)
""")
# Commit the transaction
conn.commit()
# Close the cursor and connection
cur.close()
conn.close()
print("Sample table created and data inserted successfully.")
For future readers, Postgres
has a built in solution, REPLACE()
.
Here's it added to my CONCAT()
line to accomplish the desired result:
REPLACE(CONCAT({concat_columns}), '"', '')