Please, find my data sample e
below.
Problem: continuous covariates are being treated as categorial covariates in Cox Regression
Question: I need to (1) keep all .0
on all integers but in such a way that (2) e$Ki67
and e$age
remain continuous in my Cox Regression. How can this be done?
I recently received great help here on SO, as I wanted to add .0
to all integers in a column containing both integers and decimals.
The solution was
e$Ki67 <- sprintf("%0.1f", as.numeric(as.character(e$Ki67)))
e$age <- sprintf("%0.1f", as.numeric(as.character(e$age)))
So that
> head(e)
rfs Ki67 WHO simpson age sex rad.dose recurrence dead os
1 25.33 0.6 1 1 43.0 1 0 1 0 214.67
2 207.93 3.3 2 2 76.0 1 0 0 1 207.93
3 80.00 1.0 2 1 79.0 1 0 0 1 80.00
4 47.77 0.6 1 3 84.0 1 0 1 1 52.67
5 193.25 0.6 1 1 62.0 1 0 0 1 193.25
6 6.08 3.7 2 3 71.0 1 0 1 1 65.54
However, when running my Cox Regression, I receive this error:
In fitter(X, Y, strata = Strata, offset = offset, weights = weights, : Ran out of iterations and did not converge
It seems that cph
from the rms
-package now consider e$Ki67
and e$age
categorial covariates, but they are in fact continuous covariates:
> summary(a)
Effects Response : Surv(rfs, recurrence)
Factor Low High Diff. Effect S.E. Lower 0.95 Upper 0.95
rad.dose 0 53.24 53.24 -8.7557e+00 1.0340e+01 -2.9021e+01 1.1510e+01
Hazard Ratio 0 53.24 53.24 1.5757e-04 NA 2.4899e-13 9.9714e+04
Ki67 - 0.2:0.5 4 1.00 NA -3.4399e+01 1.0790e+00 -3.6514e+01 -3.2284e+01
Hazard Ratio 4 1.00 NA 1.1498e-15 NA 1.3873e-16 9.5298e-15
Ki67 - 0.3:0.5 4 2.00 NA -2.7546e+01 1.6863e+01 -6.0596e+01 5.5041e+00
Hazard Ratio 4 2.00 NA 1.0887e-12 NA 4.8242e-27 2.4569e+02
Ki67 - 0.4:0.5 4 3.00 NA 5.8874e+00 7.6362e+00 -9.0793e+00 2.0854e+01
Hazard Ratio 4 3.00 NA 3.6046e+02 NA 1.1400e-04 1.1398e+09
Ki67 - 0.6:0.5 4 5.00 NA 2.8224e-02 1.4846e+00 -2.8816e+00 2.9380e+00
Hazard Ratio 4 5.00 NA 1.0286e+00 NA 5.6047e-02 1.8878e+01
Ki67 - 0.7:0.5 4 6.00 NA 9.0075e+00 2.5211e+00 4.0662e+00 1.3949e+01
Hazard Ratio 4 6.00 NA 8.1643e+03 NA 5.8336e+01 1.1426e+06
Ki67 - 0.8:0.5 4 7.00 NA -3.4891e-01 3.0083e+00 -6.2451e+00 5.5473e+00
Hazard Ratio 4 7.00 NA 7.0545e-01 NA 1.9399e-03 2.5654e+02
So; I need to (1) keep all .0
on all integers but in such a way that (2) e$Ki67
and e$age
remain continuous in my Cox Regression.
I have used the following script:
e$Ki67 <- sprintf("%0.1f", as.numeric(as.character(e$Ki67)))
e$age <- sprintf("%0.1f", as.numeric(as.character(e$age)))
library(rms)
d <- datadist(e)
options(datadist="d")
e$WHO <- as.factor(e$WHO)
e$simpson <- as.factor(e$simpson)
e$sex <- as.factor(e$sex)
a <- cph(Surv(rfs,recurrence)~Ki67+simpson+WHO+age+sex+rad.dose,data=e,surv=TRUE,x=TRUE,y=TRUE)
With the following data e
:
e <- structure(list(rfs = c(25.33, 207.93, 80, 47.77, 193.25, 6.08,
0.69, 174.85, 30.75, 27.27, 162.27, 204.98, 122.81, 20.53, 22.28,
197.65, 94.23, 195.94, 92.19, 6.93, 193.38, 14.09, 152.38, 49.15,
190.46, 50.56, 66.76, 188.58, 188.42, 78.65, 125.77, 176.59,
185.69, 185.23, 184.71, 184.31, 183.59, 181.49, 96.53, 180.63,
30.16, 65.71, 179.48, 122.61, 177.35, 176.66, 0.13, 67.15, 175.31,
86.74, 174.65, 169.53, 169.23, 41.99, 168.77, 167.69, 56.71,
163.84, 163.81, 162.69, 162.63, 162.37, 119.59, 160.1, 159.47,
12.42, 155.56, 155.47, 155.27, 154.87, 154.61, 128.43, 56.51,
150.67, 50.79, 47.93, 83.58, 146.1, 144.69, 159.31, 140.58, 136.64,
135.52, 88.41, 134.11, 134.18, 133.49, 131.81, 77.04, 130.6,
63.87, 62.98, 88.51, 123.5, 122.45, 121.72, 121.69, 120.57, 1.54,
114.79), Ki67 = c("0.6", "3.3", "1.0", "0.6", "0.6", "3.7", "1.4",
"1.1", "1.8", "1.6", "0.7", "0.5", "0.3", "1.7", "0.5", "1.2",
"4.1", "0.6", "1.4", "1.3", "1.8", "2.6", "0.7", "0.8", "1.0",
"0.7", "0.7", "2.1", "1.3", "2.7", "1.3", "0.8", "1.1", "1.8",
"1.8", "0.4", "0.9", "6.4", "1.7", "1.5", "0.6", "2.7", "0.4",
"1.5", "1.4", "1.8", "2.3", "0.7", "2.4", "1.2", "0.6", "0.8",
"3.0", "4.0", "0.5", "1.2", "5.1", "1.5", "0.6", "1.2", "1.7",
"0.7", "1.4", "2.7", "1.1", "0.9", "0.5", "0.7", "0.9", "0.4",
"0.8", "0.8", "0.5", "0.9", "0.5", "1.2", "1.4", "2.5", "2.7",
"4.2", "0.8", "0.5", "1.7", "1.2", "1.6", "0.5", "2.6", "2.0",
"3.9", "0.6", "0.2", "0.5", "0.8", "0.5", "0.5", "0.6", "1.4",
"0.9", "1.0", "1.8"), WHO = structure(c(1L, 2L, 2L, 1L, 1L, 2L,
2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 2L,
1L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
1L, 1L, 3L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("1",
"2", "3"), class = "factor"), simpson = structure(c(1L, 2L, 1L,
3L, 1L, 3L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 2L,
2L, 2L, 1L, 1L, 1L, 3L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 2L,
1L, 2L, 2L, 2L, 3L, 3L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 1L,
1L, 2L, 2L, 2L, 2L, 3L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 1L, 3L, 1L,
2L, 2L, 3L, 1L, 1L, 2L, 1L, 3L, 3L, 3L, 2L, 1L, 2L, 2L, 2L, 2L,
2L, 3L, 1L, 2L, 2L, 3L, 2L, 3L, 3L, 2L, 1L, 2L, 2L, 3L, 2L, 2L,
2L), .Label = c("1", "2", "3"), class = "factor"), age = c("43.0",
"76.0", "79.0", "84.0", "62.0", "71.0", "75.0", "69.0", "53.0",
"70.0", "56.0", "45.0", "77.0", "72.0", "56.0", "59.0", "84.0",
"72.0", "83.0", "80.0", "49.0", "50.0", "68.0", "49.0", "46.0",
"50.0", "73.0", "51.0", "45.0", "42.0", "73.0", "56.0", "63.0",
"30.0", "67.0", "56.0", "58.0", "72.0", "51.0", "49.0", "68.0",
"65.0", "60.0", "64.0", "52.0", "65.0", "76.0", "78.0", "74.0",
"39.0", "30.0", "66.0", "58.0", "49.0", "67.0", "53.0", "69.0",
"41.0", "42.0", "66.0", "57.0", "52.0", "25.0", "64.0", "48.0",
"51.0", "47.0", "46.0", "44.0", "68.0", "41.0", "76.0", "65.0",
"60.0", "46.0", "54.0", "66.0", "42.0", "46.0", "83.0", "72.0",
"54.0", "51.0", "77.0", "58.0", "49.0", "52.0", "66.0", "50.0",
"32.0", "71.0", "45.0", "68.0", "55.0", "55.0", "44.0", "27.0",
"61.0", "76.0", "47.0"), sex = structure(c(2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 1L,
2L, 2L, 2L, 1L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 1L,
2L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L,
1L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L), .Label = c("0",
"1"), class = "factor"), rad.dose = c(0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
5.4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5.4, 0, 0, 53.24, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5.4, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), recurrence = c(1L, 0L, 0L,
1L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L,
0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L,
1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L), dead = c(0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L,
1L, 1L, 1L, 0L, 1L, 0L, 1L, 1L, 0L, 1L, 1L, 1L, 0L, 1L, 1L, 0L,
0L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 0L, 0L, 1L,
0L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L,
0L, 0L, 0L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 0L,
1L, 0L, 0L, 0L, 0L, 0L, 1L, 0L), os = c(214.67, 207.93, 80, 52.67,
193.25, 65.54, 0.69, 174.85, 206.29, 27.27, 162.27, 204.98, 122.81,
49.94, 22.28, 197.65, 94.23, 195.94, 92.19, 6.93, 193.38, 18.37,
152.38, 49.15, 190.46, 144.07, 66.76, 188.58, 188.42, 78.65,
125.77, 176.59, 185.69, 185.23, 184.71, 184.31, 183.59, 181.49,
96.53, 180.63, 112.92, 179.88, 179.48, 122.61, 177.35, 176.66,
0.13, 67.15, 175.31, 174.82, 174.65, 169.53, 169.23, 168.8, 168.77,
167.69, 167.29, 163.84, 163.81, 162.69, 162.63, 162.37, 162.2,
160.1, 159.47, 157.4, 155.56, 155.47, 155.27, 154.87, 154.61,
128.43, 56.51, 150.67, 148.73, 147.98, 146.83, 146.1, 144.69,
159.31, 140.58, 136.64, 135.52, 125.77, 134.11, 134.18, 133.49,
131.81, 77.04, 130.6, 63.87, 126.78, 88.51, 123.5, 122.45, 121.72,
121.69, 120.57, 1.54, 114.79)), row.names = c(NA, 100L), class = "data.frame")
I think you have a few issues but the main one is that the variables you are interested in are character variables:
str(e)
'data.frame': 100 obs. of 10 variables:
$ rfs : num 25.3 207.9 80 47.8 193.2 ...
$ Ki67 : chr "0.6" "3.3" "1.0" "0.6" ...
$ WHO : Factor w/ 3 levels "1","2","3": 1 2 2 1 1 2 2 1 1 1 ...
$ simpson : Factor w/ 3 levels "1","2","3": 1 2 1 3 1 3 2 2 2 2 ...
$ age : chr "43.0" "76.0" "79.0" "84.0" ...
$ sex : Factor w/ 2 levels "0","1": 2 2 2 2 2 2 2 2 2 1 ...
$ rad.dose : num 0 0 0 0 0 0 0 0 0 0 ...
$ recurrence: int 1 0 0 1 0 1 0 0 1 0 ...
$ dead : int 0 1 1 1 1 1 1 1 1 1 ...
$ os : num 214.7 207.9 80 52.7 193.2 ...
Just change them to numeric so that cph
recognizes them as such:
e$Ki67 <- as.numeric(e$Ki67)
e$age <- as.numeric(e$Ki67)
you can't have a variable as continuous but also character. I don't see the point of using sprintf
here. If you want it for presentation purposes you could create a second copy of the variables and then use them for tables etc but i dont think that necessary, this here for presenting data.
The bigger issue is the model still wont run but I think thats because you are overfitting here with a small sample, it may work on your full dataset.