I have the following Python code to compute both AUC and plot the ROC graphic:
import numpy as np
import sklearn.metrics
from sklearn.metrics import roc_curve
from sklearn.metrics import roc_auc_score
from matplotlib import pyplot
testY = np.array([1., 1., 2., 1., 1., 1., 2., 1., 1., 1., 2., 2., 1., 2., 1., 1., 2., 1., 2., 2., 1., 2., 1., 2.,
1., 2., 1., 1., 1., 1., 1., 2., 2., 2., 1., 1., 1., 1., 1., 1., 2., 2., 1., 1., 1., 1., 1., 2.,
1., 1., 1., 2., 1., 2., 2., 1., 2., 2., 2., 1., 2., 1., 2., 1., 1., 2., 1., 2., 2., 1., 1., 1.,
2., 2., 1., 1., 1., 2., 1., 1., 2., 2., 1., 1., 1., 2., 1., 1., 2., 1., 2., 1., 1., 2., 1., 1.,
1., 2., 1., 2., 1., 2., 2., 1., 2., 1., 1., 1., 1., 1., 2., 2.])
predY = np.array([1.00000000e+00, 0.00000000e+00, 0.00000000e+00, 1.00000000e+00
, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00
, 0.00000000e+00, 0.00000000e+00, 1.00000000e+00, 0.00000000e+00
, 1.00000000e+00, 1.00000000e+00, 0.00000000e+00, 0.00000000e+00
, 1.00000000e+00, 0.00000000e+00, 1.00000000e+00, 1.00000000e+00
, 1.00000000e+00, 0.00000000e+00, 1.00000000e+00, 1.00000000e+00
, 1.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00
, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 1.00000000e+00
, 6.02987735e-03, 0.00000000e+00, 1.00000000e+00, 0.00000000e+00
, 1.00000000e+00, 1.00000000e+00, 0.00000000e+00, 0.00000000e+00
, 0.00000000e+00, 0.00000000e+00, 1.00000000e+00, 0.00000000e+00
, 0.00000000e+00, 1.00000000e+00, 0.00000000e+00, 1.00000000e+00
, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00
, 0.00000000e+00, 1.00000000e+00, 0.00000000e+00, 0.00000000e+00
, 0.00000000e+00, 1.00000000e+00, 1.00000000e+00, 0.00000000e+00
, 1.00000000e+00, 0.00000000e+00, 1.00000000e+00, 0.00000000e+00
, 0.00000000e+00, 1.00000000e+00, 0.00000000e+00, 1.17126142e-37
, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00
, 0.00000000e+00, 3.92363570e-44, 0.00000000e+00, 0.00000000e+00
, 1.00000000e+00, 0.00000000e+00, 1.00000000e+00, 1.00000000e+00
, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 4.22579080e-01
, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00
, 3.12829528e-14, 0.00000000e+00, 1.00000000e+00, 0.00000000e+00
, 0.00000000e+00, 0.00000000e+00, 1.00000000e+00, 1.00000000e+00
, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 1.00000000e+00
, 1.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00
, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00
, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00])
print(testY.shape)
print(predY.shape)
print(predY.min(), predY.max())
lr_auc = roc_auc_score(testY, predY, labels= [1,2])
print(lr_auc)
ns_probs = np.empty( shape = predY.shape[0] )
ns_probs[:] = 0.5
ns_auc = roc_auc_score(testY, ns_probs, labels= [1,2])
print(ns_auc)
ns_fpr, ns_tpr, _ = roc_curve(testY, ns_probs, pos_label = 1)
lr_fpr, lr_tpr, _ = roc_curve(testY, predY, pos_label = 1)
print("ROC")
print("lr_fpr")
print(lr_fpr)
print("lr_tpr")
print(lr_tpr)
pyplot.plot(ns_fpr, ns_tpr, linestyle='--', label='')
pyplot.plot(lr_fpr, lr_tpr, marker='.', label="Positive label")
# axis labels
pyplot.xlabel('False Positive Rate')
pyplot.ylabel('True Positive Rate')
# show the legend
pyplot.legend()
# show the plot
pyplot.show()
This is the output I get:
(112,)
(112,)
0.0 1.0
0.5855614973262032 # <--- this is the AUC
0.5
ROC
lr_fpr
[0. 0.36363636 0.36363636 0.45454545 1. ]
lr_tpr
[0. 0.25 0.26470588 0.26470588 1. ]
And this is the plot I get:
So clearly the plot has less than 0.5 AUC, why there is this difference?
A few notes:
It appears that the plot is correct and the score is incorrect, assuming your predY
is supposed to be the probability of your positive class 1. From the documentation for roc_auc_score
:
y_score
[...] The probability estimates correspond to the probability of the class with the greater label