pythonpython-3.xpostgresqlpsycopg3

How can I concatenate columns without quotation marks in the results while avoiding SQL injection attacks using psycopg?


I want to concatenate with results like concat_columns here:

Results formatted properly

In the example code below the results have quotation marks: Houston-Alice-"salary" I do not want this, I want the results like this: Houston-Alice-salary

I've tried variations on the "'{}'".format(data_col) to no avail. data_col, "{}".format(data_col), and '{}'.format(data_col) all return Houston-Alice-77321.

The column names are user supplied so I need to use methods that prevent SQL injection attacks.

What should I try next?

Here is my example code:

import psycopg

import pandas as pd
    
def so_question(columns, group_by_columns, table):
                """
                example for SO
                """
                table = psycopg.sql.Composable.as_string(psycopg.sql.Identifier(table))
                columns = [psycopg.sql.Composable.as_string(psycopg.sql.Identifier(col)) for col in columns]
                group_by_columns = [psycopg.sql.Composable.as_string(psycopg.sql.Identifier(col)) for col in group_by_columns]
            
                for data_col in columns:
                    group_by = ''
                    # check if there are grouping columns
                    if len(group_by_columns) > 0:
                        concat_columns = " ,'-',".join(group_by_columns) + " ,'-'," + "'{}'".format(data_col)
                        group_by_clause_group_by = " ,".join(group_by_columns) + " ," + data_col
                    # CTE generation:
                    sql_statement = f"""
                                WITH sql_cte as (
                                    SELECT
                                            CONCAT({concat_columns}) as concat_columns 
                                            ,{data_col} as value
            
                                    from {table}, unnest(array[{data_col}]) AS my_col
                                    group by 
                                            {group_by_clause_group_by}
                                            )
                                    SELECT * FROM sql_cte
                                    """
                return sql_statement
    
    def execute_sql_get_dataframe(sql):
            """
            Creates (and closes) db connection, gets requested sql data , returns as Pandas DataFrame.
            Args:
                    sql (string): sql query to execute.
        
            Returns:
                Pandas DataFrame of sql query results.
            """
            try:
                # print(sql_statement_to_execute)
                # create db connection, get connection and cursor object
                db_connection_cursor = db_connection_cursor_object()
                # execute query
                db_connection_cursor['cursor'].execute(sql)
                # get results into tuples
                tuples_list = db_connection_cursor['cursor'].fetchall()
                # get column names: https://www.geeksforgeeks.org/get-column-names-from-postgresql-table-using-psycopg2/
                column_names = [desc[0] for desc in db_connection_cursor['cursor'].description]
                db_connection_cursor['connection'].close()
                # create df from results
                df_from_sql_query = pd.DataFrame(tuples_list, columns=column_names)
                return df_from_sql_query
            except Exception as exc:
                log.exception(f'sql_statement_to_execute:\n {sql}', exc_info=True)
                log.exception(msg=f'Exception: {exc}', exc_info=True)
        
    
    
    data_columns = ['salary']
    group_by_columns_in_order_of_grouping = ['city', 'name']
    
    _sql = so_question(columns=data_columns,
                                  group_by_columns=group_by_columns_in_order_of_grouping,
                                  table='random_data')
    dataframe = execute_sql_get_dataframe(sql=_sql)
    print(dataframe)

Here's the code for data generation:

import psycopg

# Connect to the database
conn = psycopg.connect("dbname=mydatabase user=myuser password=mypassword")
cur = conn.cursor()

# Create the sample table
cur.execute("""
    CREATE TABLE sample_table (
        city VARCHAR(50),
        name VARCHAR(50),
        salary INTEGER
    )
""")

# Insert sample data into the table
cur.execute("""
    INSERT INTO sample_table (city, name, salary) VALUES
    ('New York', 'Alice', 70000),
    ('Los Angeles', 'Bob', 80000),
    ('Chicago', 'Charlie', 75000),
    ('Houston', 'Eve', 71758),
    ('Phoenix', 'Dave', 68000)
""")

# Commit the transaction
conn.commit()

# Close the cursor and connection
cur.close()
conn.close()

print("Sample table created and data inserted successfully.")

Solution

  • For future readers, Postgres has a built in solution, REPLACE().

    Here's it added to my CONCAT() line to accomplish the desired result:

    REPLACE(CONCAT({concat_columns}), '"', '')