So I'm working with a dataset of Stroke cases from an hospital, and I would like to use Cox regression to make a survival analysis using time of arrival in the hospital, time of leave and survival or not. I have also lab data, sex, and age of the patient. processing the data is pretty straight forward but the I'm trying to use lifelines CoxPHFitter and fit it to the data I get the error : "ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all()." I will paste down here some info about the data I'm using :
X.info()
X.head()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4552 entries, 0 to 4551
Data columns (total 3 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 CHOL 4552 non-null float64
1 sex 4552 non-null int64
2 age 4552 non-null int64
dtypes: float64(1), int64(2)
CHOL | Sex | Age | |
---|---|---|---|
0 | 4.9 | 1 | 95 |
1 | 4.9 | 1 | 91 |
2 | 4.1 | 0 | 50 |
3 | 5.6 | 0 | 79 |
4 | 4.9 | 0 | 57 |
I use T and E as:
T = df['survival_time'] # Time-to-event data
E = df['event_occurred'] # Censoring indicator
where T is leave date of the hospital - admission to the hospital date in days and E is 1 for Death as a result or 0 for survival (also tried True and False)
And this is how I use the CoxPHfitter :
from lifelines import CoxPHFitter
# Fit Cox proportional hazards model
coxph = CoxPHFitter()
coxph.fit(X, duration_col=T, event_col=E)
print(coxph.summary())
The line coxph.fit(X, duration_col=T, event_col=E)
generates this error : ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().
Anyone has an idea of why I get this error ?
full error log:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Cell In[150], line 5
3 # Fit Cox proportional hazards model
4 coxph = CoxPHFitter()
----> 5 coxph.fit(X, duration_col=T, event_col=E)
7 print(coxph.summary())
File ~/anaconda3/lib/python3.11/site-packages/lifelines/utils/__init__.py:56, in CensoringType.right_censoring.<locals>.f(model, *args, **kwargs)
53 @wraps(function)
54 def f(model, *args, **kwargs):
55 cls.set_censoring_type(model, cls.RIGHT)
---> 56 return function(model, *args, **kwargs)
File ~/anaconda3/lib/python3.11/site-packages/lifelines/fitters/coxph_fitter.py:290, in CoxPHFitter.fit(self, df, duration_col, event_col, show_progress, initial_point, strata, weights_col, cluster_col, robust, batch_mode, timeline, formula, entry_col, fit_options)
184 """
185 Fit the Cox proportional hazard model to a right-censored dataset. Alias of `fit_right_censoring`.
186
(...)
287
288 """
289 self.strata = utils._to_list_or_singleton(utils.coalesce(strata, self.strata))
--> 290 self._model = self._fit_model(
291 df,
292 duration_col,
293 event_col=event_col,
294 show_progress=show_progress,
295 initial_point=initial_point,
296 strata=self.strata,
297 weights_col=weights_col,
298 cluster_col=cluster_col,
299 robust=robust,
300 batch_mode=batch_mode,
301 timeline=timeline,
302 formula=formula,
303 entry_col=entry_col,
304 fit_options=fit_options,
305 )
306 return self
File ~/anaconda3/lib/python3.11/site-packages/lifelines/fitters/coxph_fitter.py:610, in CoxPHFitter._fit_model(self, *args, **kwargs)
608 def _fit_model(self, *args, **kwargs):
609 if self.baseline_estimation_method == "breslow":
--> 610 return self._fit_model_breslow(*args, **kwargs)
611 elif self.baseline_estimation_method == "spline":
612 return self._fit_model_spline(*args, **kwargs)
File ~/anaconda3/lib/python3.11/site-packages/lifelines/fitters/coxph_fitter.py:623, in CoxPHFitter._fit_model_breslow(self, *args, **kwargs)
619 model = SemiParametricPHFitter(
620 penalizer=self.penalizer, l1_ratio=self.l1_ratio, strata=self.strata, alpha=self.alpha, label=self._label
621 )
622 if utils.CensoringType.is_right_censoring(self):
--> 623 model.fit(*args, **kwargs)
624 return model
625 else:
File ~/anaconda3/lib/python3.11/site-packages/lifelines/utils/__init__.py:56, in CensoringType.right_censoring.<locals>.f(model, *args, **kwargs)
53 @wraps(function)
54 def f(model, *args, **kwargs):
55 cls.set_censoring_type(model, cls.RIGHT)
---> 56 return function(model, *args, **kwargs)
File ~/anaconda3/lib/python3.11/site-packages/lifelines/fitters/coxph_fitter.py:1229, in SemiParametricPHFitter.fit(self, df, duration_col, event_col, show_progress, initial_point, strata, weights_col, cluster_col, robust, batch_mode, timeline, formula, entry_col, fit_options)
1226 self.formula = formula
1227 self.entry_col = entry_col
-> 1229 X, T, E, weights, entries, original_index, self._clusters = self._preprocess_dataframe(df)
1231 self.durations = T.copy()
1232 self.event_observed = E.copy()
File ~/anaconda3/lib/python3.11/site-packages/lifelines/fitters/coxph_fitter.py:1305, in SemiParametricPHFitter._preprocess_dataframe(self, df)
1303 df = df.set_index(self.strata)
1304 else:
-> 1305 sort_by = [self.duration_col, self.event_col] if self.event_col else [self.duration_col]
1306 df = df.sort_values(by=sort_by)
1307 original_index = df.index.copy()
File ~/anaconda3/lib/python3.11/site-packages/pandas/core/generic.py:1527, in NDFrame.__nonzero__(self)
1525 @final
1526 def __nonzero__(self) -> NoReturn:
-> 1527 raise ValueError(
1528 f"The truth value of a {type(self).__name__} is ambiguous. "
1529 "Use a.empty, a.bool(), a.item(), a.any() or a.all()."
1530 )
ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().
When you use the fit
method of the CoxPHFitter
class, it expects the datato include all necessary columns like CHOL, sex, and age) as well as the duration and event indicator columns.You are not doing this. you can solve it this way:
from lifelines import CoxPHFitter
import pandas as pd
data = {
'CHOL': [4.9, 4.9, 4.1, 5.6, 4.9],
'sex': [1, 1, 0, 0, 0],
'age': [95, 91, 50, 79, 57],
'survival_time': [20, 40, 15, 10, 30],
'event_occurred': [1, 0, 1, 0, 1]
}
df = pd.DataFrame(data)
coxph = CoxPHFitter()
coxph.fit(df, duration_col='survival_time', event_col='event_occurred')
summary = coxph.summary
print(summary)
which gives you what you wanted:
coef exp(coef) se(coef) coef lower 95% coef upper 95% \
covariate
CHOL -1.067889 0.343734 5.069700 -11.004319 8.868542
sex -2.123784 0.119578 15.619846 -32.738120 28.490551
age 0.059477 1.061282 0.440625 -0.804131 0.923086
exp(coef) lower 95% exp(coef) upper 95% cmp to z \
covariate
CHOL 1.662972e-05 7.104912e+03 0.0 -0.210641
sex 6.053620e-15 2.362051e+12 0.0 -0.135967
age 4.474764e-01 2.517045e+00 0.0 0.134984
p -log2(p)
covariate
CHOL 0.833167 0.263322
sex 0.891847 0.165131
age 0.892625 0.163874