I am working on a Machine Learning project that predicts the price electric cars on Jupyter Notebook.
I run this cell on Jupyter Notebook:
p = regressor.predict(df2)
I get this error:
ValueError Traceback (most recent call last)
~\AppData\Local\Temp\ipykernel_16424\818753220.py in <module>
----> 1 p = regressor.predict(df2)
~\.conda\envs\electricvehiclepriceprediction\lib\site-packages\sklearn\tree\_classes.py in predict(self, X, check_input)
465 """
466 check_is_fitted(self)
--> 467 X = self._validate_X_predict(X, check_input)
468 proba = self.tree_.predict(X)
469 n_samples = X.shape[0]
~\.conda\envs\electricvehiclepriceprediction\lib\site-packages\sklearn\tree\_classes.py in _validate_X_predict(self, X, check_input)
431 """Validate the training data on predict (probabilities)."""
432 if check_input:
--> 433 X = self._validate_data(X, dtype=DTYPE, accept_sparse="csr", reset=False)
434 if issparse(X) and (
435 X.indices.dtype != np.intc or X.indptr.dtype != np.intc
~\.conda\envs\electricvehiclepriceprediction\lib\site-packages\sklearn\base.py in _validate_data(self, X, y, reset, validate_separately, **check_params)
564 raise ValueError("Validation should be done on X, y or both.")
565 elif not no_val_X and no_val_y:
--> 566 X = check_array(X, **check_params)
567 out = X
568 elif no_val_X and not no_val_y:
~\.conda\envs\electricvehiclepriceprediction\lib\site-packages\sklearn\utils\validation.py in check_array(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, estimator)
744 array = array.astype(dtype, casting="unsafe", copy=False)
745 else:
--> 746 array = np.asarray(array, order=order, dtype=dtype)
747 except ComplexWarning as complex_warning:
748 raise ValueError(
~\.conda\envs\electricvehiclepriceprediction\lib\site-packages\pandas\core\generic.py in __array__(self, dtype)
1991
1992 def __array__(self, dtype: NpDtype | None = None) -> np.ndarray:
-> 1993 return np.asarray(self._values, dtype=dtype)
1994
1995 def __array_wrap__(
ValueError: could not convert string to float: 'N/'
What did I try?
I tried using the following code:
uv = np.nanpercentile(df2['Base MSRP'], [99])[0]*2
df2['Base MSRP'][(df2['Base MSRP']>uv)] = uv
le = preprocessing.LabelEncoder()
cols = ['County', 'City', 'State', 'ZIP Code', 'Model Year', 'Make', 'Model', 'Electric Vehicle Type', 'Clean Alternative Fuel Vehicle (CAFV) Eligibility']
for col in cols:
le.fit(t[col])
df2[col] = le.transform(df2[col])
print(le.classes_)
regressor.fit(x, y)
p = regressor.predict(df2)
Here is the link to my notebook: https://github.com/SteveAustin583/electric-vehicle-price-prediction-revengers/blob/main/revengers.ipynb
How to fix this issue?
I have found a solution. I have replaced "N" with "-1" by using this code:
df2 = df2[df2['Model Year'] != 'N/'] # Filter out rows where 'Model Year' is 'N/'
for col in cols:
df2[col] = df2[col].replace('N/', -1)
le.fit(df2[col])
df2[col] = le.transform(df2[col])
print(le.classes_)