I am trying to predict choice probabilities through proc mdc using the following code:
/* Reformat data as necessary */
data brand_choice(keep=Brand Price Displ Feature Chosen PurchaseID);
array prices[4] priceprivate pricesunshine pricekeebler pricenabisco;
array displays[4] displprivate displsunshine displkeebler displnabisco;
array features[4] featprivate featsunshine featkeebler featnabisco;
array chosenbrand[4] private sunshine keebler nabisco;
array allbrands[4] $8 _temporary_ ('Private' 'Sunshine' 'Keebler' 'Nabisco');
set reg.crackers_hw5;
PurchaseID = _N_;
do i = 1 to 4;
Brand = allbrands[i];
Price = prices[i];
Displ = displays[i];
Feature = features[i];
Chosen = chosenbrand[i];
output;
end;
/* Step 1: Prepare the Dataset */
proc surveyselect data=brand_choice
out=brand_choice_sampled
outall
samprate=0.75
seed=1
method=srs;
run;
data brand_choice_sampled;
set brand_choice_sampled;
if selected = 1 then choiceT = chosen;
else choiceT = .;
run;
/* Step 2: Estimate the Model */
proc mdc data=brand_choice_sampled;
class displ feature;
model choiceT = price displ feature displ*feature / type = mprobit nchoice = 4;
id PurchaseID;
restrict Displ1 = 0, Feature1 = 0, Displ1Feature1 = 0;
output out=pred_probs p=predicted_prob;
run;
/* Step 3: View the Predicted Probabilities */
proc print data=pred_probs;
var predicted_prob;
id PurchaseID;
run;
And getting this error:
ERROR: The NCHOICE= option is not allowed when the number of choices for each individual (ID) is not the same.
Here is what my dataset looks like after the reformat:
I'm not sure exactly what happened. But, this is the corrected code that produced the expected results:
/* c. Reformat data as necessary */
data brand_choice(keep=Brand Price Displ Feature Chosen PurchaseID);
array prices[4] priceprivate pricesunshine pricekeebler pricenabisco;
array displays[4] displprivate displsunshine displkeebler displnabisco;
array features[4] featprivate featsunshine featkeebler featnabisco;
array chosenbrand[4] private sunshine keebler nabisco;
array allbrands[4] $8 _temporary_ ('Private' 'Sunshine' 'Keebler' 'Nabisco');
set reg.crackers_hw5;
PurchaseID = _N_;
do i = 1 to 4;
Brand = allbrands[i];
Price = prices[i];
Displ = displays[i];
Feature = features[i];
Chosen = chosenbrand[i];
output;
end;
/* Prepare the Dataset */
proc surveyselect data=brand_choice
out=brand_choice_sampled
outall
samprate=0.75
seed=1
method=srs;
run;
data brand_choice_sampled;
set brand_choice_sampled;
if selected = 1 then choiceT = chosen;
else choiceT = .;
run;
/* Estimate the Model */
proc mdc data=brand_choice_sampled;
class displ feature;
model chosen = price displ feature displ*feature / type = clogit nchoice = 4;
id PurchaseID;
restrict Displ1 = 0, Feature1 = 0, Displ1Feature1 = 0;
output out=pred_probs p=predicted_prob;
run;
/* View the Predicted Probabilities */
proc print data=pred_probs;
var predicted_prob;
id PurchaseID;
run;