I am in trouble trying to create a plot with different lines with different colors and shapes using the the following dataset:
structure(list(age = structure(c(21, 45, 15, 16, 16, 16, 17,
17, 17, 18, 18, 8, 9, 19, 19, 10, 10, 20, 21, 21, 21, 22, 22,
22, 23, 24, 30, 42, 24, 24, 25, 25, 25, 26, 26, 26, 27, 27, 27,
28, 28, 32, 29, 29, 29, 30, 30, 30, 31, 31, 31, 32, 32, 32, 33,
33, 33, 34, 34, 34, 35, 35, 35, 36, 36, 36, 37, 37, 37, 38, 38,
38, 39, 39, 39, 40, 40, 40, 41, 41, 41, 42, 42, 42, 43, 43, 43,
44, 44, 45, 45, 45, 45, 46, 46, 46, 47, 47, 47, 48, 48, 48, 49,
49, 49, 50, 50, 50, 51, 51, 51, 52, 52, 52, 53, 53, 53, 54, 54,
54, 55, 52, 55, 56, 56, 56, 57, 57, 57, 58, 58, 58, 59, 59, 59,
60, 60, 60, 61, 61, 61, 62, 62, 62, 63, 63, 63, 64, 64, 64, 65,
65, 65, 61, 66, 66, 67, 67, 67, 68, 68, 68, 69, 69, 69, 70, 70,
70, 71, 72, 71, 72, 72, 72, 73, 73, 73, 74, 74, 74, 75, 75, 75,
76, 76, 74, 77, 77, 77, 78, 78, 78, 79, 79, 79, 80, 80, 80, 81,
81, 61, 82, 82, 82, 83, 83, 83, 84, 84, 84, 85, 85, 85, 86, 86,
86, 87, 87, 87, 88, 88, 88, 89, 89, 89, 90, 90, 90, 91, 91, 91,
92, 92, 92, 93, 93, 93, 94, 94, 94, 95, 95, 96, 96, 97, 98, 98,
99, 123), label = "age", format.stata = "%10.0g"), healthy = structure(c(0,
1, NA, 0, 1, NA, 0, 0, 1, 0, 1, NA, 0, 1, NA, 0, 1, NA, 0, 1,
NA, 0, 1, NA, 0, 1, NA, 0, 0, NA, 0, 1, NA, 0, 1, NA, 0, 1, NA,
0, 1, NA, 0, 1, NA, 0, 1, NA, 0, 1, NA, 0, 1, NA, 0, 1, NA, 0,
1, NA, 0, 1, NA, 0, 1, NA, 0, 1, NA, 0, 1, NA, 0, 1, NA, 0, 1,
NA, 0, 1, NA, 0, 1, NA, 0, 1, NA, 0, 1, NA, 0, 1, NA, 0, 1, NA,
0, 1, NA, 0, 1, NA, 0, 1, NA, 0, 1, NA, 0, 1, NA, 0, 1, NA, 0,
1, NA, 0, 1, NA, 0, 1, NA, 0, 1, NA, 0, 1, NA, 0, 1, NA, 0, 1,
NA, 0, 1, NA, 0, 1, NA, 0, 1, NA, 0, 1, NA, 0, 1, NA, 0, 1, NA,
0, 1, NA, 0, 1, NA, 0, 1, NA, 0, 1, NA, 0, 1, NA, 0, 1, NA, 0,
1, NA, 0, 1, NA, 0, 1, NA, 0, 1, NA, 0, 1, NA, 0, 1, NA, 0, 1,
NA, 0, 1, NA, 0, 1, NA, 0, 1, NA, 0, 1, NA, 0, 1, NA, 0, 1, NA,
0, 1, NA, 0, 1, NA, 0, 1, NA, 0, 1, NA, 0, 1, NA, 0, 1, NA, 0,
1, NA, 0, 1, NA, 0, 1, NA, 0, 1, NA, 0, 1, 0, 1, 0, 0, 1, 0,
0), label = "Has no health condition", format.stata = "%9.0g"),
col1 = c(NaN, NaN, NaN, NaN, NaN, 17.4959526062012,
NaN, NaN, 13.7867650985718, NaN, NaN, 17.1148929595947, NaN,
NaN, 21.7456340789795, NaN, NaN, 22.5867776870728, NaN, NaN,
19.4799966812134, NaN, NaN, 27.0873068896207, NaN, NaN, 25.2461756229401,
NaN, NaN, 24.2968890402052, NaN, NaN, 24.8148552349636, NaN,
NaN, 26.4674949645996, NaN, NaN, 28.1996012926102, NaN, NaN,
25.7581091680025, NaN, NaN, 28.0744308040988, NaN, NaN, 2.6226562442202,
NaN, NaN, 27.8483320304326, NaN, NaN, 31.5788489391929, NaN,
NaN, 28.7516339432959, NaN, NaN, 30.5037746810913, NaN, NaN,
29.9186396184175, NaN, NaN, 30.4269195417079, NaN, NaN, 7.7028050581614,
NaN, NaN, 29.524931703295, NaN, NaN, 29.4119287666522, NaN,
NaN, 27.8233588773813, NaN, NaN, 27.2971927142534, NaN, NaN,
29.1216611549503, NaN, NaN, 31.0940565321181, NaN, NaN, 8.9319949883681,
NaN, NaN, 29.896419574688, NaN, NaN, 28.0839774861055, NaN,
NaN, 28.9685502633816, NaN, NaN, 28.4690587390553, NaN, NaN,
28.9532592956056, NaN, NaN, 30.5288856335175, NaN, NaN, 29.1697682274712,
NaN, NaN, 30.3849462162365, NaN, NaN, 31.564100275437, NaN,
NaN, 31.2172098477681, NaN, NaN, 29.8669878641764, NaN, NaN,
29.011706361867, NaN, NaN, 34.0714236164952, NaN, NaN, 30.5197359085083,
NaN, NaN, 30.1599007987976, NaN, NaN, 29.1693642373179, NaN,
NaN, 30.4129105788011, 2.344, NaN, 31.452887409336, NaN, NaN,
33.9476460350884, NaN, NaN, 34.9800090471904, NaN, NaN, 31.3496222882657,
NaN, NaN, 33.3435718158506, NaN, NaN, 33.5722597038353, NaN,
NaN, 30.3194521300647, NaN, NaN, 34.8352882758431, NaN, NaN,
32.2088109652201, NaN, NaN, 36.3922643159565, NaN, NaN, 33.6284904612435,
NaN, NaN, 36.0349838032442, NaN, NaN, 37.3032214742311, NaN,
NaN, 35.8305793603261, NaN, NaN, 33.5542264086135, NaN, NaN,
37.3956505526667, NaN, NaN, 35.6306561260689, NaN, NaN, 34.1713064738682,
NaN, NaN, 34.4658223736671, NaN, NaN, 33.7924506975257, NaN,
NaN, 35.8171516060829, NaN, NaN, 32.4849494457245, NaN, NaN,
38.3814140768612, NaN, NaN, 34.3103677204677, NaN, NaN, 28.7768711447716,
NaN, NaN, 30.117017004225, NaN, NaN, 34.4370415551322, NaN,
NaN, 37.1509838104248, NaN, NaN, 39.7904551029205, NaN, NaN,
45.0848134358724, NaN, NaN, 38.9700946807861, NaN, NaN, 44.7834892272949,
NaN, NaN, 18.0415954589844, NaN, NaN, NaN, NaN, NaN, NaN,
NaN, NaN, NaN), col2 = c(9.73884888912769, 3.55150119134575,
NaN, 9.69603123615697, 3.53710220541273, NaN, 9.66741126191382,
3.51121828223117, NaN, 9.63565664291382, 3.48193562582294,
NaN, 9.45292086436831, 3.47054993978111, NaN, 9.59095467716815,
3.4745261718521, NaN, 9.39332915389019, 3.4410732194279,
NaN, 9.45476519597041, 3.43423518675641, NaN, 9.36457543146043,
3.41311541986672, NaN, 9.36624081511247, 3.4122371215694,
NaN, 9.40900721373381, 3.40614468643151, NaN, 9.38348010778427,
3.38705124429694, NaN, 9.34997592101226, 3.38823599217939,
NaN, 9.35146037391994, 3.36121070384979, NaN, 9.305238659099,
3.36387685238654, NaN, 9.24218050638835, 3.36620352413733,
NaN, 9.28473771413167, 3.34838822679441, NaN, 9.12819498328752,
3.3376117037953, NaN, 9.16214960674907, 3.34076386559792,
NaN, 9.20669736862183, 3.33245655816638, NaN, 9.16058754702227,
3.34403007896617, NaN, 9.19735798665455, 3.31413558165537,
NaN, 9.09941055815099, 3.30938078784778, NaN, 9.17626369700712,
3.30820650180788, NaN, 9.10454079223006, 3.32709397527528,
NaN, 9.18342437010545, 3.31390441284663, NaN, 9.15841562379666,
3.31152755907661, NaN, 9.06430034915896, 3.31614059893811,
NaN, 9.15609089195306, 3.30047230137553, NaN, 9.14333961931474,
3.31495148878472, NaN, 9.18908043650838, 3.31331432326157,
NaN, 9.13065004079355, 3.33573157658996, NaN, 9.09754027804813,
3.33558044968891, NaN, 9.20422429604964, 3.33170155550341,
NaN, 9.24062031539029, 3.34154178537922, NaN, 9.24709942419204,
3.35687677938486, NaN, 9.31868435786321, 3.36500219054192,
NaN, 9.26267687479655, 3.38257493265543, NaN, 9.34868538379669,
3.39329878659382, NaN, 9.39233847531405, 3.39171330950282,
NaN, 9.39422841092725, 3.42206691953289, NaN, 9.44792991372483,
3.41481578063612, NaN, 9.51408845773015, 3.44435220991933,
NaN, 9.52255477905273, 3.46103483781632, NaN, 9.54275298855968,
3.47304158897723, NaN, 9.58754419846968, 3.50915765865764,
NaN, 9.74308764395402, 3.50377081416107, NaN, 9.72430517088692,
3.53841964457851, NaN, 9.59207115447122, 3.55271640356007,
NaN, 9.72684358766941, 3.56763132344122, NaN, 9.87622256372489,
3.57748130932605, NaN, 9.86901096675707, 3.59796099073431,
NaN, 9.89924946358676, 3.63481671815923, NaN, 10.0701293576743,
3.63426666021913, NaN, 10.0823068306825, 3.65953091444696,
NaN, 10.13834406363, 3.70572604793357, NaN, 10.2659117362403,
3.74113381209494, NaN, 10.3758992457019, 3.75834940744368,
NaN, 10.4749858366045, 3.80154993331019, NaN, 10.4794574134094,
3.82382008481566, NaN, 10.5577326643056, 3.86617674629581,
NaN, 10.6963342138603, 3.88153685222973, NaN, 10.8680706911309,
3.91069514101202, NaN, 10.8778532635082, 3.96953968384966,
NaN, 10.9667280233359, 4.00885742770301, NaN, 11.0575908298141,
3.99355643777286, NaN, 11.2109310068983, 4.09530009965639,
NaN, 11.2052431462416, 4.10855156691499, NaN, 11.4367402182685,
4.14386796464725, NaN, 11.4933553970966, 4.2487904576972,
NaN, 11.7320929695578, 4.31915928588973, NaN, 11.8614741487706,
4.30428578172411, NaN, 12.023966105779, 4.36698396164074,
NaN, 12.2372739829269, 4.42745336266451, NaN, 12.2063292094639,
4.49448736011982, NaN, 12.3472878138224, 4.46298309734889,
NaN, 12.6991779892533, 4.59589936998155, NaN, 12.7648248076439,
4.691696030753, NaN, 12.9378400802612, 4.79949178695679,
NaN, 13.0320794582367, 4.80274267196655, NaN, 13.6076149940491,
4.75885391235352, 13.6394104003906, 5.07671976089478, 13.3812193870544,
14.5171988010406, 4.9033317565918, 13.9298572540283, 15.8893375396729
)), class = c("grouped_df", "tbl_df", "tbl", "data.frame"
), row.names = c(NA, -249L), groups = structure(list(age = structure(c(15,
14, 16, 22, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
96, 97, 98, 99, 103), label = "age", format.stata = "%10.0g"),
.rows = structure(list(1:3, 4:6, 7:9, 10:12, 13:15, 16:18,
19:21, 22:24, 25:27, 28:30, 31:33, 34:36, 37:39, 40:42,
43:45, 46:48, 49:51, 52:54, 55:57, 58:60, 61:63, 64:66,
67:69, 70:72, 73:75, 76:78, 79:81, 82:84, 85:87, 88:90,
91:93, 94:96, 97:99, 100:102, 103:105, 106:108, 109:111,
112:114, 115:117, 118:120, 121:123, 124:126, 127:129,
130:132, 133:135, 136:138, 139:141, 142:144, 145:147,
148:150, 151:153, 154:156, 157:159, 160:162, 163:165,
166:168, 169:171, 172:174, 175:177, 178:180, 181:183,
184:186, 187:189, 190:192, 193:195, 196:198, 199:201,
202:204, 205:207, 208:210, 211:213, 214:216, 217:219,
220:222, 223:225, 226:228, 229:231, 232:234, 235:237,
238:240, 241:242, 243:244, 245L, 246:247, 248L, 249L), ptype = integer(0), class =
c("vctrs_list_of",
"vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -86L), .drop = TRUE))
I am creating a plot like this:
I created the next code:
data3|> ggplot(aes(x=age,y=col1,group=factor(healthy)))+geom_smooth()+
geom_point(aes(x=age,y=col21))+scale_fill_lancet()+
scale_fill_discrete(labels=c("Healthy","Sick","SCI"))
The code have generated the following plot.
But I cannot add different shapes or colors for the generated lines. How to deal with this issue?
There are a few issues in your code. At a surface level, you should be using scale_color_discrete
, not scale_fill_discrete
:
data3|>
ggplot(aes(x=age,y=swiscihat1,color=factor(healthy)))+
geom_smooth()+
geom_point(aes(x=age,y=shshat1))+
scale_color_discrete(labels=c("Healthy","Sick","SCI"))
Unfortunately, you have a larger problem. The color for geom_smooth
is not linked to a value in "healthy", which means it is very difficult to make it appear in a unified legend with the other two lines. However, it is possible to trick ggplot into doing this:
data3 |>
ggplot() +
geom_smooth(aes(x = age, y = swiscihat1, color = 'SCI', lty = 'SCI'), se = F, show.legend = F) +
geom_line(aes(x = age, y = shshat1, color = factor(healthy), lty = factor(healthy))) +
scale_color_manual(values = c('#82c1e9', '#2e6d65', '#3e647d'), labels=c("Healthy","Sick", 'SCI'), na.translate = F) +
scale_linetype_manual(values = c(1, 2, 1), labels=c("Healthy","Sick", 'SCI'), na.translate = F, guide = guide_none()) +
theme_bw() +
labs(x = 'Age', y = 'Visits to Healthcare Provider', color = NULL)
But that is a strange way to use ggplot, and somewhat difficult to follow. You have to specify the literal string "SCI" as aesthetics in geom_smooth
, repeat aesthetic definitions elsewhere, and suppress a lot of ggplot's default (intended) behavior.
Instead, I would precompute what I intend to plot.
First, compute the smooth:
data3.loess <- loess(swiscihat1 ~ age, data = data3)
Then we'll create data4
, which aligns all the values for the plot in one dataset:
data4 <- data3 |>
ungroup() |>
filter(!is.na(shshat1)) |>
mutate(smooth = predict(data3.loess, newdata = data.frame(age, swiscihat1))) |>
select(age, healthy, shshat1, smooth) |>
pivot_wider(id_cols = c(age, smooth), names_from = healthy, values_from = shshat1) |>
pivot_longer(-age) |>
mutate(label = case_when(
name == 'smooth' ~ 'SCI',
name == '0' ~ 'Sick',
name == '1' ~ 'Healthy'
), label = factor(label, c('SCI', 'Sick', 'Healthy'), ordered = T))
age name value label
<dbl> <chr> <dbl> <ord>
1 15 smooth NA SCI
2 15 0 9.74 Sick
3 15 1 3.55 Healthy
4 16 smooth 18.5 SCI
5 16 0 9.70 Sick
6 16 1 3.54 Healthy
7 17 smooth 19.3 SCI
8 17 0 9.67 Sick
9 17 1 3.51 Healthy
10 18 smooth 20.0 SCI
# … with 248 more rows
And finally, this greatly simplifies the call to ggplot:
data4 |>
ggplot(aes(x = age, y = value, color = label, lty = label)) +
geom_line() +
scale_color_manual(values = c( '#3e647d', '#82c1e9', '#2e6d65')) +
scale_linetype_manual(values = c(1, 1, 2)) +
theme_bw() +
labs(x = 'Age', y = 'Visits to Healthcare Provider', color = NULL, linetype = NULL)