pythonpostgresqlpsycopg2

Create a table with multiple columns and values to PostgreSQL


I have multiple feather files, each has about 120 columns and about 300000 values. I wonder if there is a faster way to create a table aside creating columns manually for each files. Here is the code that I'm trying to build:

path = "E:\\file_path"
list = glob.glob(os.path.join(path, '*.feather'))

for file in list:
    print(f"File: {file}")
    df = pd.read_feather(file)
    table_name = {file}

    columns = df.dtypes.to_dict()
    columns_sql = []

    def map_dtype_to_pg(dtype): # this function is for changing to the type input in sql
        if pd.api.types.is_integer_dtype(dtype):
            return "INTEGER"
        elif pd.api.types.is_float_dtype(dtype):
            return "FLOAT"
        elif pd.api.types.is_bool_dtype(dtype):
            return "BOOLEAN"
        elif pd.api.types.is_datetime64_any_dtype(dtype):
            return "TIMESTAMP"
        else:
            return "TEXT"

    for col, dtype in columns.items(): # the column's name and type will be recorded here
        pg_type = map_dtype_to_pg(dtype)
        columns_sql.append(f'"{col}" {pg_type}')

    create_table_sql = f"CREATE TABLE {table_name} (  {' '.join(columns_sql)}\n);" # this will be the query to create multiple table from the existing file's name and value

But I don't know how to add the query to import the value according to the columns' name effectively.


Solution

  • pandas povides a way to save dataframes as SQL.

    You should be able to do this using something like :

    from sqlalchemy import create_engine
    import pandas as pd
    
    path = "E:\\file_path"
    file_list = glob.glob(os.path.join(path, '*.feather'))
    
    for file in file_list:
        print(f"File: {file}")
        df = pd.read_feather(file)
        table_name = file
    
        engine = create_engine("postgresql+psycopg2://user:pass@localhost/database")
        df.to_sql(name=table_name, con=engine)