I have a dataframe df1
(its dput
is placed at the bottom to save space), and I am running the DFS analysis as follows:
require(survival)
require(survminer)
require(ggplot2)
cox_fit <- coxph(Surv(DFS,Relapsed) ~ Gender + Stage + MSI, data=df1)
ggforest(cox_fit, data = df1, refLabel = "Reference Group")
You can see that Gender
and MSI
variables have one level shown as "Reference Group", as expected - but Stage
variable does not have it, and I don't understand why. When I just print cox_fit
object, it looks fine with all three variables:
cox_fit
# Call:
# coxph(formula = Surv(DFS, Relapsed) ~ Gender + Stage + MSI, data = df1)
#
# coef exp(coef) se(coef) z p
# GenderMale 0.7952 2.2149 0.2916 2.727 0.00639
# StageIII 1.8824 6.5693 0.4342 4.335 1.46e-05
# MSIMSI-Hi 0.4551 1.5763 0.3677 1.238 0.21579
#
# Likelihood ratio test=35.43 on 3 df, p=9.872e-08
# n= 337, number of events= 57
I don't understand what's special about Stage
variable that makes it look weird on the forest plot.
df1 <- structure(
list(
DFS = c(474,280,439,468,155,390,484,432,254,453,375,426,540,236,396,315,255,444,336,414,394,362,376,280,366,346,163,363,308,297,253,288,261,111,288,339,274,282,136,213,85,113,35,76,67,75,28,136,1339,1238,1324,117,75,1191,1234,1222,1320,1162,651,1234,1224,270,1233,1266,1071,1149,1159,1140,1224,1071,1174,1056,561,1022,315,1110,1081,1183,1166,1085,1171,689,86,1048,1087,527,539,1199,1106,1102,1113,1108,870,228,1019,89,1056,1140,1063,1082,193,1030,777,231,137,394,1079,408,994,100,805,978,1022,518,36,516,634,930,934,847,904,345,817,852,342,97,663,1031,888,349,684,188,431,114,994,922,1030,743,935,888,287,915,613,927,2,137,842,793,770,94,160,884,892,711,679,676,409,924,806,887,978,855,189,801,518,843,807,944,657,775,988,891,756,1013,892,404,883,738,853,161,496,241,870,704,97,677,865,862,650,722,873,737,851,816,891,603,726,65,833,789,93,787,812,611,505,715,833,756,707,710,149,828,830,678,371,78,210,874,658,564,705,734,648,729,223,440,752,789,863,222,563,540,794,160,651,792,578,781,586,351,623,487,442,807,673,716,369,735,301,255,729,514,701,116,160,654,226,700,751,672,52,728,423,766,627,217,784,161,607,357,606,722,152,631,365,416,590,28,573,590,656,114,166,420,196,538,693,623,676,65,725,626,646,627,681,8,680,65,655,157,613,620,693,153,630,147,645,232,591,494,646,595,291,604,629,424,251,499,513,424,487,592,555,570,543,491,43,501,563,558,469,645,511,453,475,427,314),
Relapsed = c(T,T,F,F,F,F,F,F,F,F,F,F,F,T,F,F,F,F,F,F,F,F,F,F,F,F,F,T,F,F,F,F,F,F,F,F,F,F,F,F,F,F,T,F,F,F,T,T,F,F,F,T,T,F,F,F,F,F,F,F,F,T,F,F,F,F,F,F,F,F,F,F,F,F,T,F,F,F,F,F,F,F,T,F,F,F,F,F,F,F,F,F,T,T,F,F,F,F,F,F,T,F,F,T,F,F,F,F,F,T,F,T,F,F,T,F,F,F,F,F,F,T,T,F,F,T,F,F,F,T,F,F,F,T,F,F,F,F,F,F,F,F,F,F,T,T,F,F,F,T,F,F,F,F,F,F,F,F,F,F,F,F,T,F,F,F,F,F,F,F,F,F,F,F,F,T,F,F,F,F,T,T,T,F,F,F,F,F,F,F,F,F,F,F,F,F,F,T,F,F,T,F,F,F,F,F,F,F,F,F,T,F,F,T,T,T,F,F,F,F,F,F,F,F,T,T,F,F,F,F,F,F,F,F,F,F,F,F,F,T,F,F,F,F,F,F,F,F,T,F,F,F,F,F,F,T,F,F,F,F,F,F,F,F,F,F,F,T,F,F,F,F,T,F,F,F,F,T,F,F,F,F,F,T,F,F,F,F,F,F,F,F,F,F,F,T,F,T,F,T,F,F,F,F,F,F,F,T,F,T,F,F,F,F,F,F,T,T,F,T,F,F,F,F,F,F,F,T,F,F,F,F,F,F,F,F,T),
Gender = structure(
c(2,2,2,1,1,1,2,2,2,1,1,2,1,2,1,1,1,2,2,1,1,1,2,2,2,1,2,1,2,1,2,1,2,1,2,1,1,2,2,2,1,1,2,2,2,1,1,1,2,2,2,2,2,2,1,1,2,2,2,1,1,2,2,2,2,1,1,1,1,1,2,1,2,1,1,1,1,2,1,1,1,1,1,2,1,2,1,2,1,1,1,1,2,2,2,2,2,2,1,2,2,1,1,2,1,2,1,2,1,1,2,2,2,1,2,2,1,2,1,2,2,2,2,1,2,1,2,1,2,2,1,2,2,2,1,2,2,2,1,2,1,2,1,2,2,2,2,1,1,1,1,2,1,2,1,2,2,2,2,2,1,2,1,1,2,1,1,2,1,1,2,1,2,2,2,2,2,1,2,1,2,2,2,1,2,2,2,2,2,1,1,1,1,2,2,1,1,2,2,2,2,1,2,2,2,2,1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,1,2,2,2,1,1,1,1,1,1,2,2,2,1,1,2,2,2,1,1,1,2,1,2,1,2,2,1,1,1,1,1,2,2,1,1,1,1,1,2,2,1,2,1,1,1,2,2,1,1,2,2,2,2,1,2,1,2,2,1,2,1,1,2,2,1,1,1,1,2,1,1,2,1,2,1,2,2,2,2,1,2,1,2,1,2,1,2,2,1,1,1,2,2,2,2,1,1,2,2,1,1,1,2,2,1,2,2,2,1,2,2),
levels = c("Female","Male"),
class = "factor"
),
Stage = structure(
c(1,2,1,1,1,2,1,2,1,2,2,2,2,2,2,1,2,2,1,2,1,1,2,2,2,2,2,1,1,1,1,1,2,1,1,2,1,2,1,2,2,2,2,2,1,1,2,2,1,1,2,2,1,2,2,2,2,1,1,2,2,2,1,2,2,2,1,1,1,2,2,1,2,2,2,2,1,2,2,2,1,2,2,1,2,2,1,2,2,1,2,2,2,1,1,1,1,1,2,1,2,2,2,2,2,1,2,1,1,2,1,2,1,1,2,2,2,2,2,1,2,1,2,2,2,2,2,2,2,2,1,2,1,2,2,1,1,2,2,1,1,1,1,2,1,2,2,2,1,2,1,1,1,2,2,1,1,1,2,2,2,1,2,1,1,1,1,1,1,2,2,2,1,2,2,2,1,1,1,2,2,2,2,1,2,1,2,1,2,2,1,2,1,2,2,1,2,2,1,1,2,1,2,2,1,2,1,2,2,2,2,2,1,2,2,2,1,2,1,2,2,1,1,1,2,2,1,2,2,1,2,2,2,1,2,1,2,1,2,2,2,1,2,2,1,2,1,1,2,2,1,2,2,2,2,2,2,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,2,1,2,2,2,2,1,2,2,1,2,2,1,2,1,1,2,1,1,2,1,1,1,2,2,2,1,2,2,1,2,2,2,1,1,2,2,2,2,2,1,2,2,2,2,2,1,2,1,1,2,2,2,2,2,2,1,1,2,1,1,2,1,2,2),
levels = c("II","III"),
class = "factor"
),
MSI = structure( c(1,1,1,1,1,2,1,1,1,2,1,1,1,1,1,1,1,1,2,1,2,2,2,1,1,2,1,1,1,2,1,2,1,1,1,1,1,1,1,1,1,2,2,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,2,1,1,2,1,2,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,2,2,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,1,2,1,1,1,1,2,1,1,1,1,1,1,2,1,1,2,2,1,1,1,2,1,2,1,2,1,2,1,2,2,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,2,1,1,1,2,1,1,1,1,2,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,2,1,1,1,1,2,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,2,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,2,1,2,1,1,1,1,1,1,1,1,2,1,1,2,1,1 ),
levels = c("MSS","MSI-Hi"),
class = "factor"
)
),
class = "data.frame")
This line does a partial match of "StageII" with "StageIII" on the row names.
coef <- structure(list(term = c("GenderMale", "StageIII", "MSIMSI-Hi"
), estimate = c(0.795207848408587, 1.88240540447955, 0.455091789862389
), std.error = c(0.291625356633886, 0.43423050039025, 0.36766034424876
), statistic = c(2.72681311936435, 4.33503727349369, 1.237804938665
), p.value = c(0.00639492461449034, 1.45735426457274e-05, 0.215788396790699
), conf.low = c(0.223632652427522, 1.03132926272586, -0.265509243408779
), conf.high = c(1.36678304438965, 2.73348154623325, 1.17569282313356
)), row.names = c("GenderMale", "StageIII", "MSIMSI-Hi"), class = "data.frame")
inds <- c("GenderFemale", "GenderMale", "StageII", "StageIII", "MSIMSS", "MSIMSI-Hi")
coef[inds, ]
# term estimate std.error statistic p.value conf.low conf.high
# NA <NA> NA NA NA NA NA NA
# GenderMale GenderMale 0.7952078 0.2916254 2.726813 6.394925e-03 0.2236327 1.366783
# StageIII StageIII 1.8824054 0.4342305 4.335037 1.457354e-05 1.0313293 2.733482
# StageIII.1 StageIII 1.8824054 0.4342305 4.335037 1.457354e-05 1.0313293 2.733482
# NA.1 <NA> NA NA NA NA NA NA
# MSIMSI-Hi MSIMSI-Hi 0.4550918 0.3676603 1.237805 2.157884e-01 -0.2655092 1.175693
This was the intended result
# term estimate std.error statistic p.value conf.low conf.high
# NA <NA> NA NA NA NA NA NA
# GenderMale GenderMale 0.7952078 0.2916254 2.726813 6.394925e-03 0.2236327 1.366783
# NA.1 <NA> NA NA NA NA NA NA
# StageIII StageIII 1.8824054 0.4342305 4.335037 1.457354e-05 1.0313293 2.733482
# NA.2 <NA> NA NA NA NA NA NA
# MSIMSI-Hi MSIMSI-Hi 0.4550918 0.3676603 1.237805 2.157884e-01 -0.2655092 1.175693
Something like this would be better
coef[match(inds, rownames(coef)), ]
# term estimate std.error statistic p.value conf.low conf.high
# NA <NA> NA NA NA NA NA NA
# GenderMale GenderMale 0.7952078 0.2916254 2.726813 6.394925e-03 0.2236327 1.366783
# NA.1 <NA> NA NA NA NA NA NA
# StageIII StageIII 1.8824054 0.4342305 4.335037 1.457354e-05 1.0313293 2.733482
# NA.2 <NA> NA NA NA NA NA NA
# MSIMSI-Hi MSIMSI-Hi 0.4550918 0.3676603 1.237805 2.157884e-01 -0.2655092 1.175693
You could use labels that will not partially match until the bug is fixed, eg, use 2/3 instead of II/III