I have a list of animals with their parents, that is used to calculate the relationship between them.
I am using the Rsymphony
R package to generate matings where each female is crosses only once and each male is crossed twice. See bellow a reproducible example.
library("data.table")
library("Rsymphony")
Pedig = data.table(
Indiv = as.character(c( 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12)),
Sire = as.character(c(NA, NA, NA, 2, 1, 1, NA, 2, 2, 3, 3, 1)),
Dam = as.character(c(NA, NA, NA, NA, NA, 5, 5, 6, 7, 8, 10, 9)),
Sex = as.character(c( 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2))
)
Pedig[, Sex := data.table::fifelse(Sex =="1", "male", "female")]
pKin <- matrix(
c(
0.500000, 0.0000, 0.000, 0.00000, 0.250000, 0.375000, 0.1250000, 0.1875000, 0.06250000, 0.09375000, 0.04687500, 0.28125000,
0.000000, 0.5000, 0.000, 0.25000, 0.000000, 0.000000, 0.0000000, 0.2500000, 0.25000000, 0.12500000, 0.06250000, 0.12500000,
0.000000, 0.0000, 0.500, 0.00000, 0.000000, 0.000000, 0.0000000, 0.0000000, 0.00000000, 0.25000000, 0.37500000, 0.00000000,
0.000000, 0.2500, 0.000, 0.50000, 0.000000, 0.000000, 0.0000000, 0.1250000, 0.12500000, 0.06250000, 0.03125000, 0.06250000,
0.250000, 0.0000, 0.000, 0.00000, 0.500000, 0.375000, 0.2500000, 0.1875000, 0.12500000, 0.09375000, 0.04687500, 0.18750000,
0.375000, 0.0000, 0.000, 0.00000, 0.375000, 0.625000, 0.1875000, 0.3125000, 0.09375000, 0.15625000, 0.07812500, 0.23437500,
0.125000, 0.0000, 0.000, 0.00000, 0.250000, 0.187500, 0.5000000, 0.0937500, 0.25000000, 0.04687500, 0.02343750, 0.18750000,
0.187500, 0.2500, 0.000, 0.12500, 0.187500, 0.312500, 0.0937500, 0.5000000, 0.17187500, 0.25000000, 0.12500000, 0.17968750,
0.062500, 0.2500, 0.000, 0.12500, 0.125000, 0.093750, 0.2500000, 0.1718750, 0.50000000, 0.08593750, 0.04296875, 0.28125000,
0.093750, 0.1250, 0.250, 0.06250, 0.093750, 0.156250, 0.0468750, 0.2500000, 0.08593750, 0.50000000, 0.37500000, 0.08984375,
0.046875, 0.0625, 0.375, 0.03125, 0.046875, 0.078125, 0.0234375, 0.1250000, 0.04296875, 0.37500000, 0.62500000, 0.04492188,
0.281250, 0.1250, 0.000, 0.06250, 0.187500, 0.234375, 0.1875000, 0.1796875, 0.28125000, 0.08984375, 0.04492188, 0.53125000),
nrow = 12
)
rownames(pKin) <- colnames(pKin) <- Pedig[["Indiv"]]
Kin <- pKin[Pedig[Sex == "female"][["Indiv"]], Pedig[Sex == "male"][["Indiv"]]]
Zeros <- 0*Kin
rhsM <- rep(nrow(Kin)/ncol(Kin), ncol(Kin))
ConM <- NULL
for(k in 1:length(rhsM)){
Con <- Zeros
Con[, k] <- 1
ConM <- rbind(ConM, c(Con))
}
rhsF <- rep(1, nrow(Kin))
ConF <- NULL
for(k in 1:length(rhsF)){
Con <- Zeros
Con[k, ] <- 1
ConF <- rbind(ConF, c(Con))
}
A <- rbind(ConF, ConM)
RHS <- c(rhsF, rhsM)
ub.n <- 1
Bounds <- list(upper=list(ind=1:(length(rhsM)*length(rhsF)), val=rep(ub.n, length(rhsM)*length(rhsF))))
Dir <- rep("==", length(RHS))
# Rsymphony_solve_LP
res <- Rsymphony::Rsymphony_solve_LP(
obj = c(Kin),
mat = A,
dir = Dir,
rhs = RHS,
bounds = Bounds,
types = "I",
max = FALSE
)
Solution <- matrix(res$solution, nrow=nrow(Zeros), ncol=ncol(Zeros))
Solution <- as.data.table(Solution)
colnames(Solution) <- colnames(Kin)
Solution[, ID1 := rownames(Kin)]
Solution <- melt(Solution, id.vars="ID1", variable.name="ID2", value.name="n", variable.factor = FALSE)
Solution <- Solution[n>0]
Solution[, Value := pKin[ID2, ID1], by = .I]
The solution to this simple example is shown below.
--------------------------------------
ID1 ID2 n Value
--------------------------------------
1: 9 1 1 0.062500
2: 11 1 1 0.046875
3: 5 2 1 0.000000
4: 7 2 1 0.000000
5: 8 3 1 0.000000
6: 12 3 1 0.000000
7: 6 4 1 0.000000
8: 10 4 1 0.062500
--------------------------------------
I am looking for a way to minimize the relationship of a Group
instead of each pair of mating, in a way that the desired solution looks like the following table. In this case, I would like to provide the number of Groups (2
) and the algorithm would create 2
groups with equal number of males
and females
. In this case, each group would have 2 males and 4 females.
------------------------------------------
ID1 ID2 Value Group
------------------------------------------
1: 9,11,5,7 1,2 0.027343 A
------------------------------------------
2: 8,12,6,10 3,4 0.015625 B
------------------------------------------
Can anyone help me with this problem?
Thank you.
Here's a solution using the ompr
package for model formulation.
You want to minimize the relationships within a group. I believe this can be achieved by minimizing the difference between groups. Therefore, to achieve your desired result, I am minimizing the mating pair relationship values and the difference in sum of relationship values between groups.
See code comments below for model explanations. Note that I have redefined the male and female IDs to increment from 1 to keep the code simple. I am using glpk solver but you can use any you want.
library(dplyr)
library(ROI)
library(ompr)
library(ompr.roi)
# library(ROI.plugin.symphony)
library(ROI.plugin.glpk)
n_males <- 4
n_females <- 8
n_groups <- 2
males_per_group <- n_males / n_groups
# for 4 males and 8 females, pKin matrix should be 4x8
# assuming rows 1:4 represent the 4 males and columns 5:12 represent the 8 females
pKin <- pKin[1:4, 5:12]
pKin_df <- data.frame(
m = c(t(row(pKin))),
f = c(t(col(pKin))),
pKin_value = c(t(pKin))
)
# model variables
# x[m, f, g] = 1 if male m and female f are paired and assigned to group g else 0
# y[m, g] = 1 if male m is assigned to group g else 0
# slack[g] = pKin difference between group 1 and other groups
# max_slack = max pKin difference between group 1 and other groups
model <- MIPModel() %>%
add_variable(x[m, f, g], m = 1:n_males, f = 1:n_females, g = 1:n_groups, type = "binary") %>%
add_variable(y[m, g], m = 1:n_males, g = 1:n_groups, type = "binary") %>%
add_variable(slack[g], g = 1:n_groups, type = "continuous", lb = 0) %>%
add_variable(max_slack, type = 'continuous', lb = 0) %>%
add_constraint(
# males can/must mate with 2 females
sum_over(x[m, f, g], f = 1:n_females, g = 1:n_groups) == 2, m = 1:n_males
) %>%
add_constraint(
# females can/must mate with only 1 male
sum_over(x[m, f, g], m = 1:n_males, g = 1:n_groups) == 1, f = 1:n_females
) %>%
add_constraint(
# m-f pair can belong to a group only if m assigned to that group
x[m, f, g] <= y[m, g], m = 1:n_males, f = 1:n_females, g = 1:n_groups
) %>%
add_constraint(
# each male can belong to only 1 group
sum_over(y[m, g], g = 1:n_groups) == 1, m = 1:n_males
) %>%
add_constraint(
# max males per group
sum_over(y[m, g], m = 1:n_males) == males_per_group, g = 1:n_groups
) %>%
add_constraint(
# minimize relationships within group
sum_over(x[m, f, 1] * pKin[m, f], m = 1:n_males, f = 1:n_females) + slack[g] == sum_over(x[m, f, g] * pKin[m, f], m = 1:n_males, f = 1:n_females),
g = 2:n_groups
) %>%
add_constraint(
# get max slack[g]
slack[g] <= max_slack, g = 1:n_groups
) %>%
set_objective(
sum_over(x[m, f, g] * pKin[m, f], m = 1:n_males, f = 1:n_females, g = 1:n_groups) + max_slack,
sense = "min"
)
result <- model %>%
solve_model(with_ROI(solver = "glpk"))
result %>%
get_solution(x[m, f, g]) %>%
filter(value == 1) %>%
select(-value) %>%
arrange(m) %>%
left_join(pKin_df, by = c('m', 'f')) %>%
group_by(group = g) %>%
summarise(
males = toString(unique(m)),
females = toString(unique(f)),
pKin_value = mean(pKin_value)
)
# A tibble: 2 × 4
group males females pKin_value
<int> <chr> <chr> <dbl>
1 1 3, 4 4, 8, 1, 6 0.0156
2 2 1, 2 5, 7, 2, 3 0.0273