saswhere-clauseunionproc-sqlnot-exists

SAS Replace Row if value is greater than what exists and add any new rows


I have monthly datasets that all have a column MIA_CURR I want to set each month on top of each other so that there is only one row per unique_id. Adding in any new unique_ids that aren't in the first dataset and replacing the row if the column VAR1 is <= 0 in the first dataset.

The below is what I've tried but I can't quite seem to get it to work, hoping someone might be able to help:

PROC SQL;
CREATE TABLE DATA3 AS 
SELECT 
*
FROM 
DATA1
UNION
SELECT 
*
FROM 
DATA2 AS B
WHERE 
    NOT EXISTS
    ( SELECT *
    FROM DATA1 AS A
    WHERE A.UNIQUE_ID= B.UNIQUE_ID or a.VAR1>0
    )
    
;
QUIT;

EDIT: Here's an example:
Data1:

> Unique_ID  VAR1   VAR2    VAR3    VAR4    VAR5
  00001       0       3      w       1       0
  00002     -2.22     2      e       0       9
  00003       3       2      f       0       8
  00004       1       2      v       0       9

Data2:
Unique_ID    VAR1   VAR2    VAR3    VAR4    VAR5
  00001       1      3       w       1      0
  00005    -1.012    2       t       0      7
  00006       0      2       t       1      6
  00002       1      2       f       0      9

Data3:
Unique_ID    VAR1   VAR2    VAR3    VAR4    VAR5
  00001        1      3      w        1      0
  00002        1      2      f        0      9
  00003        3      2      f        0      8
  00004        1      2      v        0      9
  00005      -1.012   2      t        0      7
  00006        0      2      t        1      6

 

Solution

  • You can use the MODIFY statement to replace, append or delete records in an existing data set.

    Example:

    Each month is processed in turn against some master result data set.

    data _2023_01;
    input id var1-var5;
    format id z5.;
    datalines;
      00001       0       3      w       1       0
      00002     -2.22     2      e       0       9
      00003       3       2      f       0       8
      00004       1       2      v       0       9
    data _2023_02;
    input id var1-var5;
    format id z5.;
    datalines;
      00001       1      3       w       1      0
      00005    -1.012    2       t       0      7
      00006       0      2       t       1      6
      00002       1      2       f       0      9
    ;
    data _2023_03;
    input id var1-var5;
    format id z5.;
    datalines;
      00001        1      3      w        1      0
      00002        1      2      f        0      9
      00003        3      2      f        0      8
      00004        1      2      v        0      9
      00005      -1.012   2      t        0      7
      00006        0      2      t        1      6
    ;
    
    * establish main data set;
    data master (index=(id));
      set _2023_01;
    run;
    
    %macro modify_with(transaction_data);
      * change data according to rules;
      data master;
        modify 
          master 
          &transaction_data (rename=var1=new_var1);
        ;
        by id;
    
        select (_iorc_);
          when (%sysrc(_sok)) do; /* existing id */
            if var1 < 0 then do;
              putlog 'replacing ' id= var1= new_var1=;
              var1 = new_var1;
              replace;
            end;
          end;
          when (%sysrc(_dsenmr)) do; /* new id */
            putlog 'adding new' id=;
            var1 = new_var1;
            output;
            _error_ = 0;
          end;
          otherwise do;
            msg = sysmsg();
            putlog msg;
          end;
        end;
      run;
    %mend;
    
    %modify_with(_2023_02)
    %modify_with(_2023_03)