I am working with network data and want to identify connected subnetworks. I loaded my data using
graph <- graph_from_data_frame(filtered_df, directed = FALSE)
and ploted my network
plot(graph)
E(graph)$conflict_period
[1] "72_1" "72_1" "72_1" "72_1" "72_1" "72_1" "72_1" "72_1" "72_1" "72_1" "72_1" "372_1" "372_1" "372_1"
[15] "372_1" "372_1" "372_1" "372_1" "372_1" "372_1" "372_1" "522_0" "522_0" "522_0" "522_0" "522_0" "522_0" "522_0"
[29] "522_0" "522_0" "522_0" "715_0" "715_0" "715_0" "715_0" "5390_0"
So far, the information on the subgroups is stored in the edges. A Node belonges to each subgroup it receives an edge from, so a Node can also belong to multiple subgroups.For instance Government of Mali belongs together with Civilians to subgroups "72_1" and togehther with FIAA to subgroup "372_1". I want to know if subgroups "72_1" and "372_1" are conected, which they are if at least two Nodes belonging to subgroup "72_1" are conected via edges to at least two Nodes belonging to subgroup"372_1". I tired ways outside Network analysis to identify this relationship, but failed. Now I am here asking for help.
the desired output would be a table listing the connected subgroup based on the aforementioned criterium. In this case it should be:
conflict_period | connected |
---|---|
72_1 | 372_1,522_0 |
372_1 | 72_1,522_0 |
522_0 | 372_1,72_1 |
715_0 | NA |
5390_0 | NA |
Here is the used data:
structure(list(side_a = c("Government of Mali", "Government of Mali",
"Government of Mali", "Government of Mali", "Government of Mali",
"Government of Mali", "Government of Mali", "Government of Mali",
"Government of Mali", "Government of Mali", "Government of Mali",
"Government of Mali", "Government of Mali", "Government of Mali",
"Government of Mali", "Government of Mali", "Government of Mali",
"Government of Mali", "Government of Mali", "Government of Mali",
"Government of Mali", "FIAA", "FIAA", "FIAA", "FIAA", "FIAA",
"FIAA", "FIAA", "FIAA", "FIAA", "FIAA", "MPGK", "MPGK", "MPGK",
"MPGK", "ARLA, FIAA, FPLA"), side_b = c("Civilians", "Civilians",
"Civilians", "Civilians", "Civilians", "Civilians", "Civilians",
"Civilians", "Civilians", "Civilians", "Civilians", "FIAA", "FIAA",
"FIAA", "FIAA", "FIAA", "FIAA", "FIAA", "FIAA", "FIAA", "FIAA",
"Civilians", "Civilians", "Civilians", "Civilians", "Civilians",
"Civilians", "Civilians", "Civilians", "Civilians", "Civilians",
"Civilians", "Civilians", "Civilians", "Civilians", "MPA"), country = c("Mali",
"Mali", "Mali", "Mali", "Mali", "Mali", "Mali", "Mali", "Mali",
"Mali", "Mali", "Mali", "Mali", "Mali", "Mali", "Mali", "Mali",
"Mali", "Mali", "Mali", "Mali", "Mali", "Mali", "Mali", "Mali",
"Mali", "Mali", "Mali", "Mali", "Mali", "Mali", "Mali", "Mali",
"Mali", "Mali", "Mali"), period_start = structure(c(765158400,
765158400, 765158400, 765158400, 765158400, 765158400, 765158400,
765158400, 765158400, 765158400, 765158400, 770256000, 770256000,
770256000, 770256000, 770256000, 770256000, 770256000, 770256000,
770256000, 770256000, 771552000, 771552000, 771552000, 771552000,
771552000, 771552000, 771552000, 771552000, 771552000, 771552000,
769910400, 769910400, 769910400, 769910400, 771206400), tzone = "UTC", class = c("POSIXct",
"POSIXt")), period_end = structure(c(817862400, 817862400, 817862400,
817862400, 817862400, 817862400, 817862400, 817862400, 817862400,
817862400, 817862400, 819158400, 819158400, 819158400, 819158400,
819158400, 819158400, 819158400, 819158400, 819158400, 819158400,
816739200, 816739200, 816739200, 816739200, 816739200, 816739200,
816739200, 816739200, 816739200, 816739200, 814492800, 814492800,
814492800, 814492800, 802828800), tzone = "UTC", class = c("POSIXct",
"POSIXt")), conflict_period = c("72_1", "72_1", "72_1", "72_1",
"72_1", "72_1", "72_1", "72_1", "72_1", "72_1", "72_1", "372_1",
"372_1", "372_1", "372_1", "372_1", "372_1", "372_1", "372_1",
"372_1", "372_1", "522_0", "522_0", "522_0", "522_0", "522_0",
"522_0", "522_0", "522_0", "522_0", "522_0", "715_0", "715_0",
"715_0", "715_0", "5390_0")), row.names = c(NA, -36L), class = c("tbl_df",
"tbl", "data.frame"))
According to your latest update, here is one option to achieve your desired output
library(igraph)
library(dplyr)
# simplify the dataframe and generate a graph
g <- df %>%
select(matches("^side|conflict")) %>%
distinct() %>%
graph_from_data_frame(directed = FALSE)
# repeat pruning the graph and retaining the subgraph(s) such that each node has the degree >= 2
gh <- g
repeat {
d <- degree(gh)
if (min(d) >= 2) break
gh <- induced_subgraph(gh, V(gh)[d >= 2])
}
# retrieve the vertices of each desired cluster
clt <- lapply(decompose(gh), \(x) E(x)$conflict_period)
# go through each conflict_period and if they are in one of the found clusters
out <- sapply(
E(g)$conflict_period,
\(p) {
lapply(clt, \(q) {
if (p %in% q) {
setdiff(q, p)
} else {
NA
}
})
}
)
# produce the desired output
res <- within(
data.frame(conflict_period = names(out)),
connected <- out
)
such that
> res
conflict_period connected
1 72_1 372_1, 522_0
2 372_1 72_1, 522_0
3 522_0 372_1, 72_1
4 715_0 NA
5 5390_0 NA