Using arules, I have got two itemsets, and I want to do subtraction between the two different itemsets when having same items.
> inspect(fsets_model_test)
items support count
[1] {SURFSKINTEMP=6,MODIS_LST=1} 0.01235235 663
[2] {TOTCO=13,MODIS_LST=1} 0.01373104 737
[3] {TOTCO=6,MODIS_LST=1} 0.01393598 748
[4] {TOTO3=15,MODIS_LST=1} 0.01265045 679
[5] {TOTH2OVAP=6,MODIS_LST=1} 0.01548236 831
[6] {TOTH2OVAP=1,MODIS_LST=1} 0.01565004 840
> inspect(fsets_nonsesmic_test)
items support count
[1] {TOTCO=6,MODIS_LST=1} 0.02192761 10013
[2] {TOTCO=13,MODIS_LST=1} 0.02261524 10327
[3] {TOTO3=15,MODIS_LST=1} 0.02432556 11108
[4] {SURFAIRTEMP=3,TOTH2OVAP=1,MODIS_LST=1} 0.01772735 8095
[5] {TOTH2OVAP=1,MODIS_LST=1} 0.02873605 13122
[6] {SURFAIRTEMP=3,TOTH2OVAP=1} 0.01856828 8479
you can see that itemsets fsets_model_test and itemsets fsets_nonsesmic_test have same items {TOTO3=15,MODIS_LST=1}
What I want to do is subtract support between two itemsets, in above case is 0.02432556 - 0.01265045 = 0.01167511, and then get a new itemsets.
How to implement this in arules, thanks
following are the example itemsets
one itemsets
fsets_model_test <- new("itemsets"
, items = new("itemMatrix"
, data = new("ngCMatrix"
, i = c(5L, 121L, 74L, 121L, 67L, 121L, 59L, 121L, 33L, 121L, 28L,
121L)
, p = c(0L, 2L, 4L, 6L, 8L, 10L, 12L)
, Dim = c(125L, 6L)
, Dimnames = list(NULL, NULL)
, factors = list()
)
, itemInfo = structure(list(labels = c("SURFSKINTEMP=1", "SURFSKINTEMP=2",
"SURFSKINTEMP=3", "SURFSKINTEMP=4", "SURFSKINTEMP=5", "SURFSKINTEMP=6",
"SURFSKINTEMP=7", "SURFSKINTEMP=8", "SURFSKINTEMP=9", "SURFSKINTEMP=10",
"SURFSKINTEMP=11", "SURFSKINTEMP=12", "SURFSKINTEMP=13", "SURFSKINTEMP=14",
"SURFSKINTEMP=15", "SURFSKINTEMP=16", "SURFAIRTEMP=1", "SURFAIRTEMP=2",
"SURFAIRTEMP=3", "SURFAIRTEMP=4", "SURFAIRTEMP=5", "SURFAIRTEMP=6",
"SURFAIRTEMP=7", "SURFAIRTEMP=8", "SURFAIRTEMP=9", "SURFAIRTEMP=10",
"SURFAIRTEMP=11", "SURFAIRTEMP=12", "TOTH2OVAP=1", "TOTH2OVAP=2",
"TOTH2OVAP=3", "TOTH2OVAP=4", "TOTH2OVAP=5", "TOTH2OVAP=6", "TOTH2OVAP=7",
"TOTH2OVAP=8", "TOTH2OVAP=9", "TOTH2OVAP=10", "TOTH2OVAP=11",
"TOTH2OVAP=12", "TOTH2OVAP=13", "TOTH2OVAP=14", "TOTH2OVAP=15",
"TOTH2OVAP=16", "TOTH2OVAP=17", "TOTO3=1", "TOTO3=2", "TOTO3=3",
"TOTO3=4", "TOTO3=5", "TOTO3=6", "TOTO3=7", "TOTO3=8", "TOTO3=9",
"TOTO3=10", "TOTO3=11", "TOTO3=12", "TOTO3=13", "TOTO3=14", "TOTO3=15",
"TOTO3=16", "TOTO3=17", "TOTCO=1", "TOTCO=2", "TOTCO=3", "TOTCO=4",
"TOTCO=5", "TOTCO=6", "TOTCO=7", "TOTCO=8", "TOTCO=9", "TOTCO=10",
"TOTCO=11", "TOTCO=12", "TOTCO=13", "TOTCO=14", "TOTCO=15", "TOTCH4=1",
"TOTCH4=2", "TOTCH4=3", "TOTCH4=4", "TOTCH4=5", "TOTCH4=6", "TOTCH4=7",
"TOTCH4=8", "TOTCH4=9", "TOTCH4=10", "TOTCH4=11", "TOTCH4=12",
"TOTCH4=13", "TOTCH4=14", "OLR_ARIS=1", "OLR_ARIS=2", "OLR_ARIS=3",
"OLR_ARIS=4", "OLR_ARIS=5", "OLR_ARIS=6", "OLR_ARIS=7", "OLR_ARIS=8",
"OLR_ARIS=9", "OLR_ARIS=10", "CLROLR_ARIS=1", "CLROLR_ARIS=2",
"CLROLR_ARIS=3", "CLROLR_ARIS=4", "CLROLR_ARIS=5", "CLROLR_ARIS=6",
"CLROLR_ARIS=7", "CLROLR_ARIS=8", "CLROLR_ARIS=9", "CLROLR_ARIS=10",
"OLR_NOAA=1", "OLR_NOAA=2", "OLR_NOAA=3", "OLR_NOAA=4", "OLR_NOAA=5",
"OLR_NOAA=6", "OLR_NOAA=7", "OLR_NOAA=8", "OLR_NOAA=9", "OLR_NOAA=10",
"MODIS_LST=1", "MODIS_LST=2", "MODIS_LST=3", "MODIS_LST=4"),
variables = structure(c(6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L,
9L, 9L, 9L, 9L, 9L, 9L, 10L, 10L, 10L, 10L, 10L, 10L, 10L,
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 8L, 8L,
8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 2L, 2L, 2L,
2L), .Label = c("CLROLR_ARIS", "MODIS_LST", "OLR_ARIS", "OLR_NOAA",
"SURFAIRTEMP", "SURFSKINTEMP", "TOTCH4", "TOTCO", "TOTH2OVAP",
"TOTO3"), class = "factor"), levels = structure(c(1L, 10L,
11L, 12L, 13L, 14L, 15L, 16L, 17L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 1L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 2L, 3L, 4L,
1L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 2L, 3L, 4L, 5L,
6L, 7L, 8L, 9L, 1L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 1L, 10L, 11L, 12L, 13L, 14L,
15L, 16L, 17L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 10L, 11L, 12L,
13L, 14L, 15L, 16L, 17L, 2L, 3L, 4L, 5L, 6L, 1L, 10L, 11L,
12L, 13L, 14L, 15L, 16L, 17L, 2L, 1L, 10L, 11L, 12L, 13L,
14L, 15L, 16L, 17L, 2L, 1L, 10L, 11L, 12L, 13L, 14L, 15L,
16L, 17L, 2L, 1L, 10L, 11L, 12L), .Label = c("1", "10", "11",
"12", "13", "14", "15", "16", "17", "2", "3", "4", "5", "6",
"7", "8", "9"), class = "factor")), .Names = c("labels",
"variables", "levels"), row.names = c(NA, -125L), class = "data.frame")
, itemsetInfo = structure(list(), .Names = character(0), row.names = integer(0), class = "data.frame")
)
, tidLists = NULL
, quality = structure(list(support = c(0.0123523493684093, 0.0137310429630734,
0.0139359839028207, 0.0126504452807691, 0.0154823564481872, 0.0156500353988896
), count = c(663, 737, 748, 679, 831, 840)), .Names = c("support",
"count"), row.names = c(NA, 6L), class = "data.frame")
, info = structure(list(data = model_data_tr, ntransactions = 53674L,
support = 0.01), .Names = c("data", "ntransactions", "support"
))
)
another itemsets is:
fsets_nonsesmic_test <- new("itemsets"
, items = new("itemMatrix"
, data = new("ngCMatrix"
, i = c(67L, 121L, 74L, 121L, 59L, 121L, 18L, 28L, 121L, 28L, 121L,
18L, 28L)
, p = c(0L, 2L, 4L, 6L, 9L, 11L, 13L)
, Dim = c(125L, 6L)
, Dimnames = list(NULL, NULL)
, factors = list()
)
, itemInfo = structure(list(labels = c("SURFSKINTEMP=1", "SURFSKINTEMP=2",
"SURFSKINTEMP=3", "SURFSKINTEMP=4", "SURFSKINTEMP=5", "SURFSKINTEMP=6",
"SURFSKINTEMP=7", "SURFSKINTEMP=8", "SURFSKINTEMP=9", "SURFSKINTEMP=10",
"SURFSKINTEMP=11", "SURFSKINTEMP=12", "SURFSKINTEMP=13", "SURFSKINTEMP=14",
"SURFSKINTEMP=15", "SURFSKINTEMP=16", "SURFAIRTEMP=1", "SURFAIRTEMP=2",
"SURFAIRTEMP=3", "SURFAIRTEMP=4", "SURFAIRTEMP=5", "SURFAIRTEMP=6",
"SURFAIRTEMP=7", "SURFAIRTEMP=8", "SURFAIRTEMP=9", "SURFAIRTEMP=10",
"SURFAIRTEMP=11", "SURFAIRTEMP=12", "TOTH2OVAP=1", "TOTH2OVAP=2",
"TOTH2OVAP=3", "TOTH2OVAP=4", "TOTH2OVAP=5", "TOTH2OVAP=6", "TOTH2OVAP=7",
"TOTH2OVAP=8", "TOTH2OVAP=9", "TOTH2OVAP=10", "TOTH2OVAP=11",
"TOTH2OVAP=12", "TOTH2OVAP=13", "TOTH2OVAP=14", "TOTH2OVAP=15",
"TOTH2OVAP=16", "TOTH2OVAP=17", "TOTO3=1", "TOTO3=2", "TOTO3=3",
"TOTO3=4", "TOTO3=5", "TOTO3=6", "TOTO3=7", "TOTO3=8", "TOTO3=9",
"TOTO3=10", "TOTO3=11", "TOTO3=12", "TOTO3=13", "TOTO3=14", "TOTO3=15",
"TOTO3=16", "TOTO3=17", "TOTCO=1", "TOTCO=2", "TOTCO=3", "TOTCO=4",
"TOTCO=5", "TOTCO=6", "TOTCO=7", "TOTCO=8", "TOTCO=9", "TOTCO=10",
"TOTCO=11", "TOTCO=12", "TOTCO=13", "TOTCO=14", "TOTCO=15", "TOTCH4=1",
"TOTCH4=2", "TOTCH4=3", "TOTCH4=4", "TOTCH4=5", "TOTCH4=6", "TOTCH4=7",
"TOTCH4=8", "TOTCH4=9", "TOTCH4=10", "TOTCH4=11", "TOTCH4=12",
"TOTCH4=13", "TOTCH4=14", "OLR_ARIS=1", "OLR_ARIS=2", "OLR_ARIS=3",
"OLR_ARIS=4", "OLR_ARIS=5", "OLR_ARIS=6", "OLR_ARIS=7", "OLR_ARIS=8",
"OLR_ARIS=9", "OLR_ARIS=10", "CLROLR_ARIS=1", "CLROLR_ARIS=2",
"CLROLR_ARIS=3", "CLROLR_ARIS=4", "CLROLR_ARIS=5", "CLROLR_ARIS=6",
"CLROLR_ARIS=7", "CLROLR_ARIS=8", "CLROLR_ARIS=9", "CLROLR_ARIS=10",
"OLR_NOAA=1", "OLR_NOAA=2", "OLR_NOAA=3", "OLR_NOAA=4", "OLR_NOAA=5",
"OLR_NOAA=6", "OLR_NOAA=7", "OLR_NOAA=8", "OLR_NOAA=9", "OLR_NOAA=10",
"MODIS_LST=1", "MODIS_LST=2", "MODIS_LST=3", "MODIS_LST=4"),
variables = structure(c(6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L,
9L, 9L, 9L, 9L, 9L, 9L, 10L, 10L, 10L, 10L, 10L, 10L, 10L,
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 8L, 8L,
8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 2L, 2L, 2L,
2L), .Label = c("CLROLR_ARIS", "MODIS_LST", "OLR_ARIS", "OLR_NOAA",
"SURFAIRTEMP", "SURFSKINTEMP", "TOTCH4", "TOTCO", "TOTH2OVAP",
"TOTO3"), class = "factor"), levels = structure(c(1L, 10L,
11L, 12L, 13L, 14L, 15L, 16L, 17L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 1L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 2L, 3L, 4L,
1L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 2L, 3L, 4L, 5L,
6L, 7L, 8L, 9L, 1L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 1L, 10L, 11L, 12L, 13L, 14L,
15L, 16L, 17L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 10L, 11L, 12L,
13L, 14L, 15L, 16L, 17L, 2L, 3L, 4L, 5L, 6L, 1L, 10L, 11L,
12L, 13L, 14L, 15L, 16L, 17L, 2L, 1L, 10L, 11L, 12L, 13L,
14L, 15L, 16L, 17L, 2L, 1L, 10L, 11L, 12L, 13L, 14L, 15L,
16L, 17L, 2L, 1L, 10L, 11L, 12L), .Label = c("1", "10", "11",
"12", "13", "14", "15", "16", "17", "2", "3", "4", "5", "6",
"7", "8", "9"), class = "factor")), .Names = c("labels",
"variables", "levels"), row.names = c(NA, -125L), class = "data.frame")
, itemsetInfo = structure(list(), .Names = character(0), row.names = integer(0), class = "data.frame")
)
, tidLists = NULL
, quality = structure(list(support = c(0.0219276058330541, 0.0226152387334415,
0.024325561329628, 0.0177273513650827, 0.0287360475123675, 0.0185682782241552
), count = c(10013, 10327, 11108, 8095, 13122, 8479)), .Names = c("support",
"count"), row.names = c(NA, 6L), class = "data.frame")
, info = structure(list(data = nonsesmic_data_tr, ntransactions = 456639L,
support = 0.01), .Names = c("data", "ntransactions", "support"
))
)
If the two sets come from transaction data that are compatible (see ? itemCoding
) then you can use match
to find matching itemsets in the two sets. After that, it should be easy to subtract the support.