sasboxplotsgplot

SAS SGPLOT VBOX: Display Mean and Median on Boxplot


I am trying to make a boxplot by using the SGPLOT in SAS. I would like to use SGPLOT with VBOX statement to flag out the Mean and Median on the gragh for each box.

Below is the data set I created as an example. Can someone give me a kind help on that?

enter image description here

/* Set the graphics environment */                                                                                                     
goptions reset=all cback=white border htitle=12pt htext=10pt;                                                                           

/* Create a sample data set to plot */                                                                                                 
data one(drop=i);                                                                                                                       
   do i=1 to 10;                                                                                                                        
      do xvar=1 to 9 by 2;                                                                                                              
         yvar=ranuni(0)*100;                                                                                                            
         output;                                                                                                                        
      end;                                                                                                                              
   end;                                                                                                                                 
run;                                                                                                                                    

/* Sort the data by XVAR */                                                                                                            
proc sort data=one;                                                                                                                     
   by xvar;                                                                                                                             
run;                                                                                                                                    

/* Use the UNIVARIATE procedure to determine */                                                                                         
/* the mean and median values */                                                                                                       
proc univariate data=one noprint;                                                                                                       
   var yvar;                                                                                                                            
   by xvar;                                                                                                                             
   output mean=mean median=median out=stat;                                                                                             
run;                                                                                                                                    

/* Merge the mean and median values back */                                                                                             
/* into the original data set by XVAR    */                                                                                             
data all;                                                                                                                               
   merge one stat;                                                                                                                      
   by xvar;                                                                                                                             
run;

Solution

  • Use VBOX for box plot, SCATTER for mean/median.

    /*--Compute the Mean and Median by sex--*/
    proc means data=sashelp.heart;
      class deathcause;
      var cholesterol;
      output out=heart(where=(_type_ > 0) keep=deathcause mean median  _type_)
        mean = mean
            median = median;
      run;
    
    /*--Merge the data--*/
    data heart2;
      keep deathcause mean median cholesterol;
      set sashelp.heart heart;
    run; 
    proc print data=heart2;run;
    
    /*--Box plot with connect and group colors--*/
    ods graphics / reset ANTIALIASMAX=5300 width=5in height=3in imagename='Box_Group_Multi_Connect';
    title 'Cholesterol by Cause of Death';
    proc sgplot data=heart2 noautolegend noborder;
      vbox cholesterol / category=deathcause group=deathcause;
      scatter x=deathcause y=mean / name='mean' legendlabel='Mean' markerattrs=(color=green);
      scatter x=deathcause y=median / name='median' legendlabel='Median' markerattrs=(color=red);
      keylegend "mean" "median" / linelength=32 location=inside across=1 position=topright;
      xaxis display=(nolabel);
    run;
    

    EDIT: Within SGPLOT and the VBOX statement, you can also plot the median as the line, and the mean as a point on the box plot, without any other manual calculations ahead of time. This is available as of SAS 9.4 M5+.

    ods graphics / reset ANTIALIASMAX=5300 width=5in height=3in imagename='Box_Group';
    title 'Cholesterol by Cause of Death';
    proc sgplot data=sashelp.heart noborder;
      vbox cholesterol / category=deathcause 
                        displaystats=(median mean) 
                        meanattrs=(color=red) 
                        medianattrs=(color=green);
      *xaxis display=(nolabel);
    run;