pythonpandasdataframescikit-learnknn

KNN algorithm throws ValueError: Unknown label type: 'continuous'


import pandas as pd
from sklearn.preprocessing import LabelEncoder
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import MinMaxScaler

path = "/content/cirrhosis.csv"
data = pd.read_csv(path)

data = data.loc[0:311]
data.head()

for col in data.columns:
  if data[col].dtype == 'int64' or data[col].dtype == 'float64':
    data[col].fillna(data[col].mean(), inplace=True)

  elif data[col].dtype == 'object':
    data[col].fillna(data[col].mode(), inplace=True)

label_encoder = LabelEncoder()
for column in data.columns:
    if data[column].dtype == 'object':
        data[column] = label_encoder.fit_transform(data[column])
print(data)

scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(data)
data = pd.DataFrame(scaled_data, columns=data.columns)

inputs = data.drop(['ID', 'Stage'],axis=1)
output = data.drop(['ID', 'N_Days', 'Status', 'Drug', 'Age', 'Sex', 'Ascites', 'Hepatomegaly', 'Spiders', 'Edema', 'Bilirubin', 'Cholesterol', 'Albumin', 'Copper', 'Alk_Phos', 'SGOT', 'Tryglicerides', 'Platelets', 'Prothrombin'], axis=1)
print(inputs)
print(output)

x_train, x_test, y_train, y_test = train_test_split(inputs, output, train_size=0.8)

model =  KNeighborsClassifier(n_neighbors=31)
model.fit(x_train,y_train)
y_pred = model.predict(x_test)

I was trying to improve the accuracy of the KNN model so I tried to perform feature Scaling But when I did perform feature scaling and tried to train my model using model.fit() it throws an ValueError the algorithm works if I don't perform Feature Scaling, But throws ValueError when I perform Feature scaling

/usr/local/lib/python3.10/dist-packages/sklearn/neighbors/_classification.py:215: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().
  return self._fit(X, y)
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-73-f656e2af91bb> in <cell line: 2>()
      1 model =  KNeighborsClassifier(n_neighbors=31)
----> 2 model.fit(x_train,y_train)
      3 y_pred = model.predict(x_test)
      4 print(y_pred)
      5 print(y_test)

2 frames
/usr/local/lib/python3.10/dist-packages/sklearn/utils/multiclass.py in check_classification_targets(y)
    216         "multilabel-sequences",
    217     ]:
--> 218         raise ValueError("Unknown label type: %r" % y_type)
    219 
    220 

ValueError: Unknown label type: 'continuous'

Solution

  • Can you please check that your response variables are continuous or not ? You are performing classification task so continuous variables in y_train or y_test may cause the error. Maybe scaling whole data caused this error and your target variable became continuous.

    Your response variable should be categorical like 0/1 or Yes/No or etc.