or-toolsconstraint-programmingcp-sat

How can we assign the tasks of the same type into the task packages as much as possible with ortools cp-sat?


In labs, tasks can be assigned into task packages and then the packages are assigned to lab analysts.

Typically we want to:

  1. Minimize the number of packages formed
  2. Try our best to put the tasks of the same type into one package

A diagram to illustrate the idea of package forming

In the following sample code, I can minimize the number of packages formed. But regarding putting tasks of the same type into one package, I try to use distances to realize this. But how can we compute the distances within all packages and add them up.

from ortools.sat.python import cp_model
import pandas as pd

model = cp_model.CpModel()


# 1. Data

tasks = {'Task A1', 'Task A2', 'Task B1', 'Task B2'}

packages = {'Package 1', 'Package 2', 'Package 3'}

max_package_size = 2

distances = {
    ('Task A1', 'Task A2'): 0,
    ('Task A1', 'Task B1'): 1,
    ('Task A1', 'Task B2'): 1,
    ('Task A2', 'Task B1'): 1,
    ('Task A2', 'Task B2'): 1,
    ('Task B1', 'Task B2'): 0,
}


# 2. Decision Variables

var_task_to_package_matrix = {
    (task, package): model.NewBoolVar(f"task {task} --> group {package}")
    for task in tasks
    for package in packages
}

var_package_is_formed_indicator = {
    package: model.NewBoolVar(f"package {package} is formed")
    for package in packages
}


# 3. Constraints


for package in packages:
    # Each package can at most have two tasks
    model.add(
        sum(var_task_to_package_matrix[task, package] for task in tasks) <= max_package_size
    )

for task in tasks:
    # One task has to be in and only can be in one package
    model.add(
        sum(var_task_to_package_matrix[task, package] for package in packages) == 1
    )


for package in packages:
    # A package is formed if any task is assigned into it
    model.AddMaxEquality(
        var_package_is_formed_indicator[package],
        [var_task_to_package_matrix[task, package] for task in tasks]
    )


# 4. Objective

total_distances = 1

total_number_of_formed_packages = sum(var_package_is_formed_indicator[package] for package in packages)

model.Minimize(total_distances + total_number_of_formed_packages)


# 5. Solve

solver = cp_model.CpSolver()
status = solver.Solve(model=model)


L = []

if status == cp_model.OPTIMAL or status == cp_model.FEASIBLE:

    for task in tasks:
        for package in packages:
            tmp = {'task':task, 'package': package, 'indicator': solver.value(var_task_to_package_matrix[task, package])}
            L.append(tmp)

    df = pd.DataFrame(L)
    df = df[df['indicator']==1]
    print(df)
else:
    print('The model is infeasible or invalid')

Solution

  • We can use circuits to achieve this. The only problem is when the scale of the problems increases, the number of variables increase tremendously. Hopefully the preprocessing can alleviate this problem.

    from ortools.sat.python import cp_model
    import pandas as pd
    
    model = cp_model.CpModel()
    
    
    # 1. Data
    
    # tasks = {'Task A1', 'Task A2', 'Task B1', 'Task B2'}
    
    tasks = {i for i in range(6)}
    
    packages = {i for i in range(3)}
    
    max_package_size = 3
    
    task_to_type = {
        0: 'A',
        1: 'A',
        2: 'B',
        3: 'A',
        4: 'B',
        5: 'B',
    }
    
    
    distances = {
        (t1, t2): 0
        if task_to_type[t1] == task_to_type[t2]
        else 1
        for t1 in tasks for t2 in tasks
    }
    
    # distances = {
    #     ('Task A1', 'Task A2'): 0,
    #     ('Task A1', 'Task B1'): 1,
    #     ('Task A1', 'Task B2'): 1,
    #     ('Task A2', 'Task B1'): 1,
    #     ('Task A2', 'Task B2'): 1,
    #     ('Task B1', 'Task B2'): 0,
    # }
    
    
    # 2. Decision Variables
    
    var_task_to_package_matrix = {
        (task, package): model.NewBoolVar(f"task {task} --> group {package}")
        for task in tasks
        for package in packages
    }
    
    var_package_is_formed_indicator = {
        package: model.NewBoolVar(f"package {package} is formed")
        for package in packages
    }
    
    
    var_package_task_sequence = {
        (p, t1, t2): model.NewBoolVar(f"package {p} task {t1} --> task {t2}")
        for p in packages
        for t1 in tasks
        for t2 in tasks
    }
    
    
    # Sequence
    for p in packages:
    
        arcs = list()
    
        for from_task in tasks:
            for to_task in tasks:
    
                # arcs
                if from_task != to_task:
                    arcs.append([
                            from_task,
                            to_task,
                            var_package_task_sequence[(p, from_task, to_task)]
                    ])
    
                    # distance = m_cost[m, from_task, to_task]
    
                    # cannot require the time index of task 0 to represent the first and the last position
                    # if True: #to_task != 0:
                    #
                    #     model.Add(
                    #         variables_task_ends[from_task] + distance <= variables_task_starts[to_task]
                    #     ).OnlyEnforceIf(variables_machine_task_sequence[(m, from_task, to_task)])
    
        for task in tasks:
            arcs.append([
                task, task, var_task_to_package_matrix[(task, p)].Not()
            ])
    
        model.AddCircuit(arcs)
    
    
    
    # 3. Constraints
    
    
    for package in packages:
        # Each package has a limit of number of tasks
        model.add(
            sum(var_task_to_package_matrix[task, package] for task in tasks) <= max_package_size
        )
    
    for task in tasks:
        # One task has to be in and only can be in one package
        model.add(
            sum(var_task_to_package_matrix[task, package] for package in packages) == 1
        )
    
    
    for package in packages:
        # A package is formed if any task is assigned into it
        model.AddMaxEquality(
            var_package_is_formed_indicator[package],
            [var_task_to_package_matrix[task, package] for task in tasks]
        )
    
    
    # 4. Objective
    
    # total_distances = 1
    
    total_distances = sum(
        var_package_task_sequence[p, t1, t2]*distances[t1, t2]
        for p in packages
        for t1 in tasks
        for t2 in tasks
    )
    
    model.Minimize(total_distances)
    
    # total_number_of_formed_packages = sum(var_package_is_formed_indicator[package] for package in packages)
    # model.Minimize(total_distances + total_number_of_formed_packages)
    
    
    # 5. Solve
    
    solver = cp_model.CpSolver()
    status = solver.Solve(model=model)
    
    
    L = []
    
    if status == cp_model.OPTIMAL or status == cp_model.FEASIBLE:
    
        for task in tasks:
            for package in packages:
                tmp = {'task':task, 'package': package, 'indicator': solver.value(var_task_to_package_matrix[task, package])}
                L.append(tmp)
    
        df = pd.DataFrame(L)
        df = df[df['indicator']==1].sort_values(['task', 'package'])
        print(df)
    else:
        print('The model is infeasible or invalid')
    
    for x in var_package_task_sequence:
        if solver.value(var_package_task_sequence[x]) == 1:
            print(x, var_package_task_sequence[x], solver.value(var_package_task_sequence[x]))