I'm attempting to generate an automated report that combines counts, row percentages, and chi-squared p-values from two-way proc freq output for a series of variables.
I'd like the counts and percentages for Variable A to be displayed under separate headers for each category of Variable B.
I'm almost there, but running the following test code on the sashelp.cars
dataset produces a report that has offset rows.
Is it possible to consolidate the rows by Cylinder values so I don't have so many empty cells in the table?
proc freq data=sashelp.cars;
tables origin*cylinders / chisq totpct outpct list out=freqpct(keep=cylinders origin count pct_row);
output out=chisq(keep=N P_PCHI) all;
run;
data freqpct;
set freqpct;
var=1;
run;
data chisq;
set chisq;
var=1;
run;
proc sql;
create table chisq_freqpct
as select *
from freqpct a
inner join
chisq b
on a.var=b.var;
quit;
proc report data=chisq_freqpct;
column cylinders origin,(count pct_row) N P_PCHI;
define cylinders / display;
define origin / across;
define count / display;
define pct_row / display;
define N / group;
define P_PCHI / group;
run;
Use GROUP for cylinder and MAX or MIN for N and P_PCHI. Only attach the N and P_CHI values to the first observation. Which means you either need to exclude the missing values of CYLINDERS and ORIGIN in the PROC FREQ step or add the MISSING keyword to the PROC REPORT step.
proc freq data=sashelp.cars noprint;
* where 0=cmiss(origin,cylinders);
tables origin*cylinders / chisq outpct out=freqpct(keep=cylinders origin count pct_row);
output out=chisq(keep=N P_PCHI ) all;
run;
data chisq_freqpct;
if _n_ = 1 then set chisq;
else call missing(of _all_);
set freqpct;
run;
options missing=' ';
proc report data=chisq_freqpct split=' ' missing;
column cylinders origin,(count pct_row) n p_pchi;
define cylinders / group ;
define origin / across;
define n / max;
define p_pchi / max;
run;
options missing='.';