sqlpostgresqlpostgres-10

insert data from staging table to 2 tables by reusing postgres index or bigserial


I have 3 tables :

CREATE TABLE stage
(
  a1 text,
  b2 text,
  c3 text,
  d4 text,
  e5 text
);

CREATE TABLE main
(
  id bigserial PRIMARY KEY,
  a1 text,
  b2 text,
  c3 text
);

CREATE TABLE secondary (
  id bigserial PRIMARY KEY,
  mainid bigint,
  d4 text,
  e5 text,
  CONSTRAINT secondary_fkey FOREIGN KEY(mainid) REFERENCES main(id)
);

I want to insert data from stage into main and secondary at once but I'm not quite sure how to do that by reusing generated bigserial in main. I'm trying a with query but I'm getting more rows (exponentially) in secondary than expected.

dbfiddle

WITH tmp AS (
  INSERT INTO
    main (a1, b2, c3)
  SELECT
    a1,
    b2,
    c3
  FROM
    stage RETURNING id
)
INSERT INTO
  secondary (mainid, d4, e5)
SELECT
  tmp.id,
  stage.d4,
  stage.e5
FROM
  tmp,
  stage;

Solution

  • Your problem is the cross join you create in the final INSERT statement with the FROM tmp, stage;. If you have 10 rows in the stage table, this will generate 100 rows rathe than the 10 you want.

    If (a1, b2, c3) uniquely identity a row in stage you could use them for a proper join condition:

    WITH tmp AS (
      INSERT INTO main (a1, b2, c3)
      SELECT a1, b2, c3
      FROM stage 
      RETURNING *
    )
    INSERT INTO secondary (mainid, d4, e5)
    SELECT tmp.id,
           stage.d4,
           stage.e5
    FROM tmp 
      JOIN stage 
        on tmp.a1 = stage.a1 
       and tmp.b2 = stage.b2 
       and tmp.c3 = stage.c3;
    

    If that is not feasible (because there are duplicates) you can generate the new IDs for the main table before the insert using nextval()

    with stage_with_mainid as (
      select nextval(pg_get_serial_sequence('main', 'id')) as mainid,
             a1, b2, c3, d4, e5
      from stage       
    ), insert_main as (
      insert into main (id, a1, b2, c3) --<< this provides the generated new ID explictely
      select mainid, a1, b2, c3
      from stage_with_mainid
    )
    insert into secondary (mainid, d4, e5)
    select mainid, d4, e5
    from stage_with_mainid;