I have a very huge data table in wide-format and give you here just an short example:
library(maditr)
library(reshape2)
library(data.table)
dt.test <- data.table(time = as.Date('2009-01-01') + 0:9,
s1 = rnorm(10, 5, 1),
s2 = rnorm(10, 10, 2),
s3 = rnorm(10, 20, 4))
dt.test <- dt.test %>%
reshape2::melt(id.vars = "time", variable.name = "stock", value.name = "price") %>%
maditr::dcast(stock ~ time, fun.aggregate = list, value.var = "price")
> dt.test
# stock 2009-01-01 2009-01-02 2009-01-03 2009-01-04 2009-01-05 2009-01-06 2009-01-07 2009-01-08 2009-01-09 2009-01-10
#1: s1 5.139208 3.879785 6.099094 5.304787 4.732387 4.77054 4.877472 4.830193 5.468288 3.121548
#2: s2 11.70447 7.360979 10.5366 7.525067 9.770211 14.13924 8.356996 10.45796 9.749032 5.034887
#3: s3 28.17668 10.22208 29.17786 24.41746 17.87546 24.67163 13.13525 14.9941 18.84145 22.07169
Now I need to calculate for each row the weights, which means: each row-entry divided by the mean of the whole row.
Now two end formats are possible for me. Either the weights come in the same table or we create a new weights table dt.weights
and save the weights there.
Version 1: Saving them in the same table dt.test
Save the weight-row for s1
after row s1
, which is called s1W
, a weight-row s2W
after row s2
and a weight-row s3W
after s3
in my data table.
Version 2: Saving them in a new table dt.weights
This should look the same way (wide-format) as before, but replace the stocks by the weights:
# dt.weights
# stock_w 2009-01-01 2009-01-02 2009-01-03 2009-01-04 2009-01-05 2009-01-06 2009-01-07 2009-01-08 2009-01-09 2009-01-10
#1: s1 5.139208 3.879785 6.099094 5.304787 4.732387 4.77054 4.877472 4.830193 5.468288 3.121548
#2: s2 11.70447 7.360979 10.5366 7.525067 9.770211 14.13924 8.356996 10.45796 9.749032 5.034887
#3: s3 28.17668 10.22208 29.17786 24.41746 17.87546 24.67163 13.13525 14.9941 18.84145 22.07169
How can this work?
The wide-format of dt.test
and dt.weights
shouldn't be changed.
EDIT: Small subset of my data:
structure(list(GPNRPlan = c(630882L, 773297L, 830234L), `2018-01-01-00` = list(
101.59, 211.02, 27.91), `2018-01-01-01` = list(98.32, 209.58,
20.98), `2018-01-01-02` = list(95.51, 205.74, 14.62), `2018-01-01-03` = list(
93.28, 210.62, 13.96), `2018-01-01-04` = list(93.03, 219.44,
13.39), `2018-01-01-05` = list(101.62, 269.02, 11.08), `2018-01-01-06` = list(
114.8, 275.1, 10.94), `2018-01-01-07` = list(114.08, 285.74,
11.17), `2018-01-01-08` = list(105.71, 299.54, 10.6), `2018-01-01-09` = list(
107.51, 316.7, 10.84), `2018-01-01-10` = list(112.17, 298.06,
10.85), `2018-01-01-11` = list(120.32, 259.44, 13.08), `2018-01-01-12` = list(
119.75, 255.66, 29.68), `2018-01-01-13` = list(115.29, 284.08,
51.07), `2018-01-01-14` = list(117.77, 249.38, 50.02), `2018-01-01-15` = list(
121.11, 266.22, 51.51), `2018-01-01-16` = list(131.71, 258.14,
54.03), `2018-01-01-17` = list(147.68, 260.28, 60.09), `2018-01-01-18` = list(
157.38, 258.74, 60.94), `2018-01-01-19` = list(161.6, 227.96,
64.69), `2018-01-01-20` = list(154, 214.5, 52.98), `2018-01-01-21` = list(
139.59, 218.96, 46.26), `2018-01-01-22` = list(121.03, 214.46,
31.93), `2018-01-01-23` = list(104.83, 211.48, 21.57), `2018-01-02-00` = list(
104.33, 208.66, 18.05), `2018-01-02-01` = list(96.83, 209.98,
13.68), `2018-01-02-02` = list(94.7, 214.64, 13.49), `2018-01-02-03` = list(
93.8, 211.1, 10.61), `2018-01-02-04` = list(93.73, 223.18,
10.56), `2018-01-02-05` = list(103.18, 290.62, 10.33), `2018-01-02-06` = list(
132.27, 302.42, 11.02), `2018-01-02-07` = list(345.6, 409.74,
14.39), `2018-01-02-08` = list(564.61, 402.22, 15.35), `2018-01-02-09` = list(
619.72, 426.02, 36.77), `2018-01-02-10` = list(626.2, 393.6,
43.88), `2018-01-02-11` = list(619.47, 383.42, 41.75), `2018-01-02-12` = list(
577.24, 338.1, 47.78), `2018-01-02-13` = list(495.96, 367.32,
46.86), `2018-01-02-14` = list(494.98, 349.86, 50.1), `2018-01-02-15` = list(
538.48, 339.86, 50.87), `2018-01-02-16` = list(524.3, 315.06,
53.49), `2018-01-02-17` = list(475.18, 307, 53.45), `2018-01-02-18` = list(
366, 294.9, 55.2), `2018-01-02-19` = list(225.15, 243.6,
57.24), `2018-01-02-20` = list(182.21, 234.46, 51.21), `2018-01-02-21` = list(
160.3, 234, 33.39), `2018-01-02-22` = list(136.96, 234.78,
24.73), `2018-01-02-23` = list(117.38, 230.74, 21.19), `2018-01-03-00` = list(
105.54, 231.48, 14.04), `2018-01-03-01` = list(100.61, 237.72,
15.86), `2018-01-03-02` = list(97.85, 230.2, 13.49), `2018-01-03-03` = list(
96.75, 240.52, 10.18), `2018-01-03-04` = list(97, 235.82,
10.28), `2018-01-03-05` = list(105.85, 301.14, 9.6), `2018-01-03-06` = list(
131.66, 317.92, 11.66), `2018-01-03-07` = list(342.72, 401.08,
13.97), `2018-01-03-08` = list(563.71, 421.58, 19.96), `2018-01-03-09` = list(
617.2, 417.5, 39.21), `2018-01-03-10` = list(626.69, 388.46,
37.58), `2018-01-03-11` = list(624.09, 379.5, 41.75), `2018-01-03-12` = list(
581.89, 353.32, 46.09), `2018-01-03-13` = list(500.86, 253.34,
45.87), `2018-01-03-14` = list(498.61, 317.54, 42.89), `2018-01-03-15` = list(
539.27, 351.52, 50.02), `2018-01-03-16` = list(527.84, 320.2,
46.89), `2018-01-03-17` = list(477.11, 296.58, 47.37), `2018-01-03-18` = list(
360.48, 287.28, 49.22), `2018-01-03-19` = list(224.44, 247.82,
52.01), `2018-01-03-20` = list(183.9, 242.1, 56.8)), row.names = c(NA,
-3L), class = c("data.table", "data.frame"), .internal.selfref = <pointer: 0x000001ff1d5e1ef0>, sorted = "GPNRPlan")
What I get with you solution (weights for each row twice):
It is not immediate but it seems to work:
First step: Build the weights table.
#Compute row means
aux <- rowMeans(dt.test[, lapply(.SD, unlist), .SDcols = 2:ncol(dt.test)])
#Compute weights and add a column with "row names"
cbind(stock = paste0(dt.test$stock, "W"),
dt.test[, lapply(.SD, function(x) unlist(x) / aux), .SDcols = 2:ncol(dt.test)]
)
stock 2009-01-01 2009-01-02 2009-01-03 2009-01-04 2009-01-05 2009-01-06 2009-01-07 2009-01-08 2009-01-09
1: s1W 0.9517776 0.9545703 1.041051 1.191171 1.32749 0.884822 0.8491691 1.15274 0.7940578
2: s2W 1.126405 0.838604 0.9497316 1.033589 1.160884 0.8274315 1.071979 0.9675088 1.267201
3: s3W 1.235231 0.7507189 1.044507 1.244315 0.9601964 0.909451 1.101159 0.9570893 0.8158607
2009-01-10
1: 0.853152
2: 0.7566673
3: 0.9814708
And then, if you like, bind all together sorted:
aux <- rowMeans(dt.test[, lapply(.SD, unlist), .SDcols = 2:ncol(dt.test)])
rbind(dt.test,
cbind(stock = paste0(dt.test$stock, "W"),
dt.test[, lapply(.SD, function(x) unlist(x) / aux), .SDcols = 2:ncol(dt.test)]
)
)[order(as.character(stock))]
stock 2009-01-01 2009-01-02 2009-01-03 2009-01-04 2009-01-05 2009-01-06 2009-01-07 2009-01-08 2009-01-09
1: s1 4.770632 4.78463 5.2181 5.970551 6.65383 4.435028 4.256324 5.777923 3.980087
2: s1W 0.9517776 0.9545703 1.041051 1.191171 1.32749 0.884822 0.8491691 1.15274 0.7940578
3: s2 11.36418 8.460586 9.58174 10.42776 11.71203 8.347867 10.81508 9.761092 12.78465
4: s2W 1.126405 0.838604 0.9497316 1.033589 1.160884 0.8274315 1.071979 0.9675088 1.267201
5: s3 25.03878 15.21747 21.17271 25.22293 19.46369 18.43505 22.32109 19.40071 16.53793
6: s3W 1.235231 0.7507189 1.044507 1.244315 0.9601964 0.909451 1.101159 0.9570893 0.8158607
2009-01-10
1: 4.276287
2: 0.853152
3: 7.633934
4: 0.7566673
5: 19.89493
6: 0.9814708