sas

How can I use PROC MDC to predict the choice probabilities for a test sample? - nchoice error


I am trying to predict choice probabilities through proc mdc using the following code:

/*  Reformat data as necessary */
data brand_choice(keep=Brand Price Displ Feature Chosen PurchaseID);
array prices[4] priceprivate pricesunshine pricekeebler pricenabisco;
array displays[4] displprivate displsunshine displkeebler displnabisco;
array features[4] featprivate featsunshine featkeebler featnabisco;
array chosenbrand[4] private sunshine keebler nabisco;
array allbrands[4] $8 _temporary_ ('Private' 'Sunshine' 'Keebler' 'Nabisco');

 set reg.crackers_hw5;

 PurchaseID = _N_;

do i = 1 to 4;
     Brand = allbrands[i];
     Price = prices[i];
     Displ = displays[i];
     Feature = features[i];
     Chosen = chosenbrand[i];
                output;
end;

/* Step 1: Prepare the Dataset */
proc surveyselect data=brand_choice
    out=brand_choice_sampled
    outall
    samprate=0.75
    seed=1
    method=srs;
run;

data brand_choice_sampled;
    set brand_choice_sampled;
    if selected = 1 then choiceT = chosen;
    else choiceT = .;
run;

/* Step 2: Estimate the Model */
proc mdc data=brand_choice_sampled;
   class displ feature;
    model choiceT = price displ feature displ*feature / type = mprobit nchoice = 4;
    id PurchaseID;
    restrict Displ1 = 0, Feature1 = 0, Displ1Feature1 = 0;
    output out=pred_probs p=predicted_prob; 
run;

/* Step 3: View the Predicted Probabilities */
proc print data=pred_probs;
    var predicted_prob; 
    id PurchaseID;
run;

And getting this error:

ERROR: The NCHOICE= option is not allowed when the number of choices for each individual (ID) is not the same.

Here is what my dataset looks like after the reformat:

a snapshot of my dataset after transformation


Solution

  • I'm not sure exactly what happened. But, this is the corrected code that produced the expected results:

    /* c. Reformat data as necessary */
    data brand_choice(keep=Brand Price Displ Feature Chosen PurchaseID);
    array prices[4] priceprivate pricesunshine pricekeebler pricenabisco;
    array displays[4] displprivate displsunshine displkeebler displnabisco;
    array features[4] featprivate featsunshine featkeebler featnabisco;
    array chosenbrand[4] private sunshine keebler nabisco;
    array allbrands[4] $8 _temporary_ ('Private' 'Sunshine' 'Keebler' 'Nabisco');
    
     set reg.crackers_hw5;
    
     PurchaseID = _N_;
    
    do i = 1 to 4;
         Brand = allbrands[i];
         Price = prices[i];
         Displ = displays[i];
         Feature = features[i];
         Chosen = chosenbrand[i];
                    output;
    end;
    
    /* Prepare the Dataset */
    proc surveyselect data=brand_choice
        out=brand_choice_sampled
        outall
        samprate=0.75
        seed=1
        method=srs;
    run;
    
    data brand_choice_sampled;
        set brand_choice_sampled;
        if selected = 1 then choiceT = chosen;
        else choiceT = .;
    run;
    
    /* Estimate the Model */
    proc mdc data=brand_choice_sampled;
       class displ feature;
        model chosen = price displ feature displ*feature / type = clogit nchoice = 4;
        id PurchaseID;
        restrict Displ1 = 0, Feature1 = 0, Displ1Feature1 = 0;
        output out=pred_probs p=predicted_prob; 
    run;
    
    
    /* View the Predicted Probabilities */
    proc print data=pred_probs;
        var predicted_prob; 
        id PurchaseID;
    run;