sqlsasrdbms

How do I create columns that add a tag if it matches with a keyword from a different dataset?


I have 2 datasets: 1 with a list of general goods and another with a list of transactions:

Keyword Goods
Soap A Soap
Soap B Soap
Shampoo Shampoo
ID Date Txn
1 1/22 Soap A 100 ml
1 1/23 Soap A 50 ml
2 1/24 Soap B 100 ml
2 1/24 Shampoo 50 ml
3 1/24 Juice 100 g

I want to create something like below that would check if the txn column matches the keyword column and create a tagging per goods for each ID (e.g. if an ID had any txns with the keyword 'Soap A' or 'Soap B', the column Soap would be tagged as 1 and 0 if not)

ID Soap Shampoo
1 1 0
2 1 1
3 0 0

I know I can make this happen by coding manually using if then statements but I have a long list of goods so I want to see if there's a more efficient way to do this through do loops.


Solution

  • Here's one way to do this:

    
    *create fake data;
    data lookup;
    infile cards dlm=',';
    input Keyword $ Goods $;
    cards;
    Soap A,  Soap
    Soap B,  Soap
    Shampoo, Shampoo
    ;
    run;
    
    data rawData;
    infile cards dlm=',' truncover;
    input ID  $ _Date $    Txn  $25.;
    informat id $8. _date $8. txn $25.;
    date = mdy(input(scan(_date, 1, "/"), 8.), 1, input(scan(_date, 2, "/"), 8.));
    format date yymmn6.;
    drop _date;
    cards;
    1,   1/22,    Soap A 100 ml
    1,   1/23,    Soap A 50 ml
    2,   1/24,    Soap B 100 ml
    2,   1/24,    Shampoo 50 ml
    3,   1/24,    Juice 100 g
    ;
    run;
    
    *get the dimensions of the number of lookup terms;
    proc sql noprint;
    select count(*) into :num_search_terms from lookup;
    quit;
    
    %put &num_search_terms;
    
    *lookup values;
    data search;
    array _lookup(&num_search_terms., 2) $ _temporary_;
    
    /*2*/
    *load array into memory;
       if _n_ = 1 then do j=1 to &num_search_terms.;
       set lookup;
       _lookup(j,1) = keyword;
       _lookup(j,2) = Goods;
       end;
     
     set rawData;
     do i=1 to &num_search_terms.;
        if find(txn, _lookup(i, 1), 'it')>0 then term=_lookup(i, 2);
     end;
     
     if not missing(term) then Value=1;
     
     drop i j keyword;
    run;
    
    *summarize to one record per id, term;
    proc sql;
    create table data2transpose as
    select id, term, max(value) as Value
    from search
    group by id, term
    order by 1;
    quit;
    
    *flip to desired format;
    proc transpose data=data2transpose out=want;
    by id;
    var value;
    id term;
    run;