sqlpandasoracle-databasemergecx-oracle

cx_oracle MERGE USING when matched, when not matched and a Pandas dataframe


I have searched stackoverflow, 'ASK TOM', and oracle forums, but can't find a solution to my problem. I am trying to either update or insert into a table based on whether or not the keys exist in the table. I keep getting 'right parenthesis missing' even after removing all parenthesis. Below is the code: I read in a csv file to a pandas data frame and then change the np.nan to ''.

    import pandas as pd
    import numpy as np
    import cx_Oracle

    CONNECT_DEV = '/@HISTORICAL_DEV'

    master_airtempclimo = pd.read_csv('/home/milstedl/JMCLIM/master_airtempclimo_FINAL_PYTHON.csv')
    master_airtempclimo.replace(np.nan, '', regex=True, inplace=True)

    dev_connect = cx_Oracle.connect(CONNECT_DEV)
    dev_cursor = dev_connect.cursor()

    sql = '''merge into jm_admin.jmm_airtempclimo a 
    using (select LOCATIONID, PERIODID, MONTH, NAME, ORGANIZATIONID, VERSION
       from dual) d
    on (a.LOCATIONID = d.LOCATIONID, a.PERIODID = d.PERIODID, a.MONTH = d.MONTH, a.NAME = d.NAME, 
    a.ORGANIZATIONID = a.ORGANIZATIONID, a.VERSION = d.VERSION)
    when matched then update set a.TEMPERATURESAMPLESIZE = :TEMPERATURESAMPLESIZE, a.DENSITYMEAN = :DENSITYMEAN, 
     a.TEMPERATUREDEWPOINTSAMPLESIZE = :TEMPERATUREDEWPOINTSAMPLESIZE, a.HUMIDITYSAMPLESIZE = :HUMIDITYSAMPLESIZE,
     a.TEMPERATUREMEAN = :TEMPERATUREMEAN, a.TEMPERATUREDEWPOINTMEAN = :TEMPERATUREDEWPOINTMEAN,
     a.PRESSUREVAPORMEAN = :PRESSUREVAPORMEAN, a.TEMPERATURESTANDARDDEVIATION = :TEMPERATURESTANDARDDEVIATION,
     a.TEMPERATUREDEWPOINTSTDDEV = :TEMPERATUREDEWPOINTSTDDEV, a.DENSITYSTANDARDDEVIATION = :DENSITYSTANDARDDEVIATION,
     a.TEMPERATUREMAXIMUM = :TEMPERATUREMAXIMUM, a.TEMPERATUREDEWPOINTMAXIMUM = :TEMPERATUREDEWPOINTMAXIMUM,
     a.HUMIDITYRELATIVEMAXIMUM = :HUMIDITYRELATIVEMAXIMUM, a.HUMIDITYABSOLUTEMAXIMUM = :HUMIDITYABSOLUTEMAXIMUM,
     a.HUMIDITYSPECIFICMAXIMUM = :HUMIDITYSPECIFICMAXIMUM, a.TEMPERATUREMINIMUM = :TEMPERATUREMINIMUM,
     a.TEMPERATUREDEWPOINTMINIMUM = :TEMPERATUREDEWPOINTMINIMUM, a.HUMIDITYRELATIVEMINIMUM = :HUMIDITYRELATIVEMINIMUM,
     a.HUMIDITYABSOLUTEMINIMUM = :HUMIDITYABSOLUTEMINIMUM, a.HUMIDITYSPECIFICMINIMUM = :HUMIDITYSPECIFICMINIMUM,
     a.TEMPERATUREMAXIMUMMEAN = :TEMPERATUREMAXIMUMMEAN, a.TEMPERATUREMINIMUMMEAN = :TEMPERATUREMINIMUMMEAN,
     a.HUMIDITYRELATIVEMAXIMUMMEAN = :HUMIDITYRELATIVEMAXIMUMMEAN, a.HUMIDITYRELATIVEMINIMUMMEAN = :HUMIDITYRELATIVEMINIMUMMEAN,
     a.TEMPERATUREMAXIMUMRANGE = :TEMPERATUREMAXIMUMRANGE, a.TEMPERATUREMINIMUMRANGE = :TEMPERATUREMINIMUMRANGE
    when not matched then insert (LOCATIONID, PERIODID, MONTH, NAME, ORGANIZATIONID, VERSION, TEMPERATURESAMPLESIZE, DENSITYMEAN, TEMPERATUREDEWPOINTSAMPLESIZE, HUMIDITYSAMPLESIZE, TEMPERATUREMEAN, TEMPERATUREDEWPOINTMEAN, PRESSUREVAPORMEAN, TEMPERATURESTANDARDDEVIATION, TEMPERATUREDEWPOINTSTDDEV, DENSITYSTANDARDDEVIATION, TEMPERATUREMAXIMUM, TEMPERATUREDEWPOINTMAXIMUM, HUMIDITYRELATIVEMAXIMUM, HUMIDITYABSOLUTEMAXIMUM, HUMIDITYSPECIFICMAXIMUM, TEMPERATUREMINIMUM, TEMPERATUREDEWPOINTMINIMUM, HUMIDITYRELATIVEMINIMUM, HUMIDITYABSOLUTEMINIMUM, HUMIDITYSPECIFICMINIMUM, TEMPERATUREMAXIMUMMEAN, TEMPERATUREMINIMUMMEAN, HUMIDITYRELATIVEMAXIMUMMEAN, HUMIDITYRELATIVEMINIMUMMEAN, TEMPERATUREMAXIMUMRANGE, TEMPERATUREMINIMUMRANGE)
    values (:LOCATIONID, :PERIODID, :MONTH, :NAME, :ORGANIZATIONID, :VERSION, :TEMPERATURESAMPLESIZE, :DENSITYMEAN, :TEMPERATUREDEWPOINTSAMPLESIZE, :HUMIDITYSAMPLESIZE, :TEMPERATUREMEAN, :TEMPERATUREDEWPOINTMEAN, :PRESSUREVAPORMEAN, :TEMPERATURESTANDARDDEVIATION, :TEMPERATUREDEWPOINTSTDDEV, :DENSITYSTANDARDDEVIATION, :TEMPERATUREMAXIMUM, :TEMPERATUREDEWPOINTMAXIMUM, :HUMIDITYRELATIVEMAXIMUM, :HUMIDITYABSOLUTEMAXIMUM, :HUMIDITYSPECIFICMAXIMUM, :TEMPERATUREMINIMUM, :TEMPERATUREDEWPOINTMINIMUM, :HUMIDITYRELATIVEMINIMUM, :HUMIDITYABSOLUTEMINIMUM, :HUMIDITYSPECIFICMINIMUM, :TEMPERATUREMAXIMUMMEAN, :TEMPERATUREMINIMUMMEAN, :HUMIDITYRELATIVEMAXIMUMMEAN, :HUMIDITYRELATIVEMINIMUMMEAN, :TEMPERATUREMAXIMUMRANGE, :TEMPERATUREMINIMUMRANGE)
'''

    try:
        for index, row in master_airtempclimo.iterrows():
            dev_cursor.execute(sql, {"LOCATIONID": row['LOCATIONID'], "PERIODID": row['PERIODID'],
                                     "MONTH": row['MONTH'], "NAME": row['NAME'], 'ORGANIZATIONID': row['ORGANIZATIONID'],
                                 'VERSION': row['VERSION'], 'TEMPERATURESAMPLESIZE': row['TEMPERATURESAMPLESIZE'],
                                 'DENSITYMEAN': row['DENSITYMEAN'], 'TEMPERATUREDEWPOINTSAMPLESIZE': row['TEMPERATUREDEWPOINTSAMPLESIZE'],
                                 'HUMIDITYSAMPLESIZE': row['HUMIDITYSAMPLESIZE'], 'TEMPERATUREMEAN': row['TEMPERATUREMEAN'],
                                 'TEMPERATUREDEWPOINTMEAN': row['TEMPERATUREDEWPOINTMEAN'], 'PRESSUREVAPORMEAN': row['PRESSUREVAPORMEAN'],
                                 'TEMPERATURESTANDARDDEVIATION': row['TEMPERATURESTANDARDDEVIATION'],
                                 'TEMPERATUREDEWPOINTSTDDEV': row['TEMPERATUREDEWPOINTSTDDEV'],
                                 'DENSITYSTANDARDDEVIATION': row['DENSITYSTANDARDDEVIATION'], 'TEMPERATUREMAXIMUM': row['TEMPERATUREMAXIMUM'],
                                 'TEMPERATUREDEWPOINTMAXIMUM': row['TEMPERATUREDEWPOINTMAXIMUM'],
                                 'HUMIDITYRELATIVEMAXIMUM': row['HUMIDITYRELATIVEMAXIMUM'],
                                 'HUMIDITYABSOLUTEMAXIMUM': row['HUMIDITYABSOLUTEMAXIMUM'],
                                 'HUMIDITYSPECIFICMAXIMUM': row['HUMIDITYSPECIFICMAXIMUM'], 'TEMPERATUREMINIMUM': row['TEMPERATUREMINIMUM'],
                                 'TEMPERATUREDEWPOINTMINIMUM': row['TEMPERATUREDEWPOINTMINIMUM'],
                                 'HUMIDITYRELATIVEMINIMUM': row['HUMIDITYRELATIVEMINIMUM'],
                                 'HUMIDITYABSOLUTEMINIMUM': row['HUMIDITYABSOLUTEMINIMUM'],
                                 'HUMIDITYSPECIFICMINIMUM': row['HUMIDITYSPECIFICMINIMUM'],
                                 'TEMPERATUREMAXIMUMMEAN': row['TEMPERATUREMAXIMUMMEAN'], 'TEMPERATUREMINIMUMMEAN': row['TEMPERATUREMINIMUMMEAN'],
                                 'HUMIDITYRELATIVEMAXIMUMMEAN': row['HUMIDITYRELATIVEMAXIMUMMEAN'],
                                 'HUMIDITYRELATIVEMINIMUMMEAN': row['HUMIDITYRELATIVEMINIMUMMEAN'],
                                 'TEMPERATUREMAXIMUMRANGE': row['TEMPERATUREMAXIMUMRANGE'],
                                 'TEMPERATUREMINIMUMRANGE': row['TEMPERATUREMINIMUMRANGE']
                                 })
    except Exception as exception:
        print('Error, ', exception)
        exit()
    else:
        dev_connect.commit()

I am sure that I did something wrong in the sql but don't know what. Any help would be greatly appreciated.


Solution

  • You have multiple issues:

    1. The DUAL table has a singe column DUMMY it does not have the columns LOCATIONID, PERIODID, MONTH, NAME, ORGANIZATIONID, VERSION - if you want to pass those as bind variables then you will need to use bind variables in the USING clause and not in the WHEN clauses.
    2. As pointed out by @PaulW, the ON clause should have AND between the expressions not ,.
    3. You have a.ORGANIZATIONID = a.ORGANIZATIONID in the ON clause, the second a. should be d.
    4. While not strictly necessary, formatting your query neatly will make it much easier to read and to logically organise the different sections of the query which, in turn, makes it simpler to debug (i.e. I didn't spot #3 until I reformatted the ON clause and then it leapt out the the right-hand side of the comparisons were not all d.).

    Only using a few of the columns (but you should get the general idea to move the bind variables to the USING clause):

    merge into jm_admin.jmm_airtempclimo a 
    using (
      select :LOCATIONID AS locationid,
             :PERIODID AS periodid,
             :MONTH AS month,
             :NAME AS name,
             :ORGANIZATIONID AS organizationid,
             :VERSION AS version,
             :TEMPERATURESAMPLESIZE AS temperaturesamplesize,
             :DENSITYMEAN AS denistymean,
             :TEMPERATUREDEWPOINTSAMPLESIZE AS temperaturedewpointsamplesize
      from   dual
    ) d
    on (
          a.LOCATIONID     = d.LOCATIONID
      AND a.PERIODID       = d.PERIODID
      AND a.MONTH          = d.MONTH
      AND a.NAME           = d.NAME 
      AND a.ORGANIZATIONID = d.ORGANIZATIONID
      AND a.VERSION        = d.VERSION
    )
    when matched then
      update set
        a.TEMPERATURESAMPLESIZE         = d.TEMPERATURESAMPLESIZE,
        a.DENSITYMEAN                   = d.DENSITYMEAN, 
        a.TEMPERATUREDEWPOINTSAMPLESIZE = d.TEMPERATUREDEWPOINTSAMPLESIZE
    when not matched then
       insert (
         LOCATIONID,
         PERIODID,
         MONTH,
         NAME,
         ORGANIZATIONID,
         VERSION,
         TEMPERATURESAMPLESIZE,
         DENSITYMEAN,
         TEMPERATUREDEWPOINTSAMPLESIZE
       ) values (
         d.LOCATIONID,
         d.PERIODID,
         d.MONTH,
         d.NAME,
         d.ORGANIZATIONID,
         d.VERSION,
         d.TEMPERATURESAMPLESIZE,
         d.DENSITYMEAN,
         d.TEMPERATUREDEWPOINTSAMPLESIZE
       )
    

    Alternatively, if you want to use named bind variables through-out then use them through-out and ignore the USING clause (Note: this will not work if you are using positional bind variables, rather than named bind variables, as you would need to pass in variables multiple times when you execute the query corresponding to the position they are used in the query):

    merge into jm_admin.jmm_airtempclimo a 
    using dual
    on (
          a.LOCATIONID     = :LOCATIONID
      AND a.PERIODID       = :PERIODID
      AND a.MONTH          = :MONTH
      AND a.NAME           = :NAME 
      AND a.ORGANIZATIONID = :ORGANIZATIONID
      AND a.VERSION        = :VERSION
    )
    when matched then
      update set
        a.TEMPERATURESAMPLESIZE         = :TEMPERATURESAMPLESIZE,
        a.DENSITYMEAN                   = :DENSITYMEAN, 
        a.TEMPERATUREDEWPOINTSAMPLESIZE = :TEMPERATUREDEWPOINTSAMPLESIZE
    when not matched then
       insert (
         LOCATIONID,
         PERIODID,
         MONTH,
         NAME,
         ORGANIZATIONID,
         VERSION,
         TEMPERATURESAMPLESIZE,
         DENSITYMEAN,
         TEMPERATUREDEWPOINTSAMPLESIZE
       ) values (
         :LOCATIONID,
         :PERIODID,
         :MONTH,
         :NAME,
         :ORGANIZATIONID,
         :VERSION,
         :TEMPERATURESAMPLESIZE,
         :DENSITYMEAN,
         :TEMPERATUREDEWPOINTSAMPLESIZE
       )