The below code prints the accuracy scores over 10 folds like below
from sklearn.datasets import load_digits, load_iris, load_breast_cancer, load_wine
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.utils import shuffle
from sklearn import preprocessing
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score, zero_one_loss, confusion_matrix
import pandas as pd
import numpy as np
z = pd.read_csv('/home/user/datasets/iris_dataset.csv', header=0)
X = z.iloc[:, :-1]
y = z.iloc[:, -1:]
X = np.array(X)
y = np.array(y)
# Performing standard scaling
scaler = preprocessing.MinMaxScaler()
X_scaled = scaler.fit_transform(X)
# Defining the SVM with 'rbf' kernel
svc = SVC(kernel='rbf',random_state=50)
#X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.10, shuffle=True)
skf = StratifiedKFold(n_splits=10, shuffle=True)
acc_score = []
#skf.get_n_splits(X, y)
for train_index, test_index in skf.split(X, y):
X_train, X_test = X_scaled[train_index], X_scaled[test_index]
y_train, y_test = y[train_index], y[test_index]
# Training the model
svc.fit(X_train, np.ravel(y_train))
# Prediction on test dataste
y_pred = svc.predict(X_test)
# Obtaining the accuracy scores of the model
score = accuracy_score(y_test, y_pred)
acc_score.append(score)
print(acc_score)
The output of the code (acc_score) looks like below :
[1.0, 0.9333333333333333, 1.0, 0.9333333333333333, 0.9333333333333333, 1.0, 0.9333333333333333, 0.9333333333333333, 1.0, 1.0]
From this how can I calculate the standard error
for these accuracy results and average accuracy
using NumPy
and sklearn
in Python ? I wish to print the standard error of these accuracy scores along with the average accuracy
To calculate the standard error of the mean (or standard error of measurement), scipy could be used:
import scipy
from scipy import stats
acc_score = [1.0, 0.9333333333333333, 1.0, 0.9333333333333333, 0.9333333333333333, 1.0, 0.9333333333333333, 0.9333333333333333, 1.0, 1.0]
print('ACC std:', '{0:0.2f}'.format(scipy.stats.sem(acc_score)))