I have 2 datasets: 1 with a list of general goods and another with a list of transactions:
Keyword | Goods |
---|---|
Soap A | Soap |
Soap B | Soap |
Shampoo | Shampoo |
ID | Date | Txn |
---|---|---|
1 | 1/22 | Soap A 100 ml |
1 | 1/23 | Soap A 50 ml |
2 | 1/24 | Soap B 100 ml |
2 | 1/24 | Shampoo 50 ml |
3 | 1/24 | Juice 100 g |
I want to create something like below that would check if the txn column matches the keyword column and create a tagging per goods for each ID (e.g. if an ID had any txns with the keyword 'Soap A' or 'Soap B', the column Soap would be tagged as 1 and 0 if not)
ID | Soap | Shampoo |
---|---|---|
1 | 1 | 0 |
2 | 1 | 1 |
3 | 0 | 0 |
I know I can make this happen by coding manually using if then statements but I have a long list of goods so I want to see if there's a more efficient way to do this through do loops.
Here's one way to do this:
*create fake data;
data lookup;
infile cards dlm=',';
input Keyword $ Goods $;
cards;
Soap A, Soap
Soap B, Soap
Shampoo, Shampoo
;
run;
data rawData;
infile cards dlm=',' truncover;
input ID $ _Date $ Txn $25.;
informat id $8. _date $8. txn $25.;
date = mdy(input(scan(_date, 1, "/"), 8.), 1, input(scan(_date, 2, "/"), 8.));
format date yymmn6.;
drop _date;
cards;
1, 1/22, Soap A 100 ml
1, 1/23, Soap A 50 ml
2, 1/24, Soap B 100 ml
2, 1/24, Shampoo 50 ml
3, 1/24, Juice 100 g
;
run;
*get the dimensions of the number of lookup terms;
proc sql noprint;
select count(*) into :num_search_terms from lookup;
quit;
%put &num_search_terms;
*lookup values;
data search;
array _lookup(&num_search_terms., 2) $ _temporary_;
/*2*/
*load array into memory;
if _n_ = 1 then do j=1 to &num_search_terms.;
set lookup;
_lookup(j,1) = keyword;
_lookup(j,2) = Goods;
end;
set rawData;
do i=1 to &num_search_terms.;
if find(txn, _lookup(i, 1), 'it')>0 then term=_lookup(i, 2);
end;
if not missing(term) then Value=1;
drop i j keyword;
run;
*summarize to one record per id, term;
proc sql;
create table data2transpose as
select id, term, max(value) as Value
from search
group by id, term
order by 1;
quit;
*flip to desired format;
proc transpose data=data2transpose out=want;
by id;
var value;
id term;
run;