pythonpython-3.xlistlist-comprehension

Python list comprehension with two loops and two conditions


I have the following data, and what I'm want to do is bring into a new list of dicts those from task_resources that match on both the tasks_id and the qos_level:

task_resources = [
  {
    "resource_id": "52a58a34-6b1b-49b7-b53d-2bc4bf72c172",
    "domain_name": "Nep",
    "tasks_id": "702148b2-4f21-4034-af8d-d11737f9f811",
    "qos_level": "default",
    "cpu_cores": 1,
    "memory_mb": 128,
    "storage_mb": 0,
    "cpu_flops": 19177,
    "gpu_flops": 0,
    "gpu_memory_mb": 0,
    "egress_network_bandwidth_gbps": 0,
    "ingress_network_bandwidth_gbps": 0,
    "create_time": "2024-11-13T12:13:52.192657"
  },
  {
    "resource_id": "8208915d-90b4-4a6c-a98c-e9eb965b4e6b",
    "domain_name": "Nep",
    "tasks_id": "702148b2-4f21-4034-af8d-d11737f9f811",
    "qos_level": "low",
    "cpu_cores": 1,
    "memory_mb": 128,
    "storage_mb": 0,
    "cpu_flops": 19177,
    "gpu_flops": 0,
    "gpu_memory_mb": 0,
    "egress_network_bandwidth_gbps": 0,
    "ingress_network_bandwidth_gbps": 0,
    "create_time": "2024-11-13T12:13:52.202177"
  },
  {
    "resource_id": "c34a4411-20d3-42eb-9f74-1336ab6ab024",
    "domain_name": "Nep",
    "tasks_id": "95bc75a6-b292-484d-a65d-86fae2cb6b6f",
    "qos_level": "default",
    "cpu_cores": 1,
    "memory_mb": 128,
    "storage_mb": 0,
    "cpu_flops": 19177,
    "gpu_flops": 0,
    "gpu_memory_mb": 0,
    "egress_network_bandwidth_gbps": 0,
    "ingress_network_bandwidth_gbps": 0,
    "create_time": "2024-11-13T12:13:52.197433"
  }
]

req_tasks = [
                {'task_name': 'iperf-server', 'qos_level': 'default'},
                {'task_name': 'iperf-client', 'qos_level': 'default'}
            ]

rel_tasks = [
    {
        'tasks_id': '702148b2-4f21-4034-af8d-d11737f9f811', 
        'domain_name': 'Nep', 
        'package_name': 'iperf-client', 
        'composite_key': 'Nep-iperf-client', 
        'version': 'v1', 
        'package_type': 'GENERIC', 
        'interoperability_tags': 'null', 
        'blacklisted_tags': 'null', 
        'task_create_time': '2024-11-13T12:13:52.190297'
    }, 
    {
        'tasks_id': '95bc75a6-b292-484d-a65d-86fae2cb6b6f', 
        'domain_name': 'Nep', 
        'package_name': 'iperf-server', 
        'composite_key': 'Nep-iperf-server', 
        'version': 'v1', 
        'package_type': 'GENERIC', 
        'interoperability_tags': 'null', 
        'blacklisted_tags': 'null', 
        'task_create_time': '2024-11-13T12:13:52.195040'
    }]

and I have this code:

task_ids = [
    d["tasks_id"]
    for d in rel_tasks
    for t in req_tasks
    if d["package_name"] == t["task_name"]
]

resources = [
    d
    for t in req_tasks
    for d in task_resources
    if d["tasks_id"] in task_ids and d["qos_level"] == t["qos_level"]
]
print(resources)

This returns:

[
    {
        "resource_id": "52a58a34-6b1b-49b7-b53d-2bc4bf72c172",
        "domain_name": "Nep",
        "tasks_id": "702148b2-4f21-4034-af8d-d11737f9f811",
        "qos_level": "default",
        "cpu_cores": 1,
        "memory_mb": 128,
        "storage_mb": 0,
        "cpu_flops": 19177,
        "gpu_flops": 0,
        "gpu_memory_mb": 0,
        "egress_network_bandwidth_gbps": 0,
        "ingress_network_bandwidth_gbps": 0,
        "create_time": "2024-11-13T12:13:52.192657",
    },
    {
        "resource_id": "c34a4411-20d3-42eb-9f74-1336ab6ab024",
        "domain_name": "Nep",
        "tasks_id": "95bc75a6-b292-484d-a65d-86fae2cb6b6f",
        "qos_level": "default",
        "cpu_cores": 1,
        "memory_mb": 128,
        "storage_mb": 0,
        "cpu_flops": 19177,
        "gpu_flops": 0,
        "gpu_memory_mb": 0,
        "egress_network_bandwidth_gbps": 0,
        "ingress_network_bandwidth_gbps": 0,
        "create_time": "2024-11-13T12:13:52.197433",
    },
    {
        "resource_id": "52a58a34-6b1b-49b7-b53d-2bc4bf72c172",
        "domain_name": "Nep",
        "tasks_id": "702148b2-4f21-4034-af8d-d11737f9f811",
        "qos_level": "default",
        "cpu_cores": 1,
        "memory_mb": 128,
        "storage_mb": 0,
        "cpu_flops": 19177,
        "gpu_flops": 0,
        "gpu_memory_mb": 0,
        "egress_network_bandwidth_gbps": 0,
        "ingress_network_bandwidth_gbps": 0,
        "create_time": "2024-11-13T12:13:52.192657",
    },
    {
        "resource_id": "c34a4411-20d3-42eb-9f74-1336ab6ab024",
        "domain_name": "Nep",
        "tasks_id": "95bc75a6-b292-484d-a65d-86fae2cb6b6f",
        "qos_level": "default",
        "cpu_cores": 1,
        "memory_mb": 128,
        "storage_mb": 0,
        "cpu_flops": 19177,
        "gpu_flops": 0,
        "gpu_memory_mb": 0,
        "egress_network_bandwidth_gbps": 0,
        "ingress_network_bandwidth_gbps": 0,
        "create_time": "2024-11-13T12:13:52.197433",
    },
]

This returns the first two dicts in the list being duplicated. Is there a way to change my list comprehension, or deduplicate the result, so I get this:

[
    {
        "resource_id": "52a58a34-6b1b-49b7-b53d-2bc4bf72c172",
        "domain_name": "Nep",
        "tasks_id": "702148b2-4f21-4034-af8d-d11737f9f811",
        "qos_level": "default",
        "cpu_cores": 1,
        "memory_mb": 128,
        "storage_mb": 0,
        "cpu_flops": 19177,
        "gpu_flops": 0,
        "gpu_memory_mb": 0,
        "egress_network_bandwidth_gbps": 0,
        "ingress_network_bandwidth_gbps": 0,
        "create_time": "2024-11-13T12:13:52.192657",
    },
    {
        "resource_id": "c34a4411-20d3-42eb-9f74-1336ab6ab024",
        "domain_name": "Nep",
        "tasks_id": "95bc75a6-b292-484d-a65d-86fae2cb6b6f",
        "qos_level": "default",
        "cpu_cores": 1,
        "memory_mb": 128,
        "storage_mb": 0,
        "cpu_flops": 19177,
        "gpu_flops": 0,
        "gpu_memory_mb": 0,
        "egress_network_bandwidth_gbps": 0,
        "ingress_network_bandwidth_gbps": 0,
        "create_time": "2024-11-13T12:13:52.197433",
    },
]

Solution

  • You can iterate over the list of dictionaries and then perform the check in the if clause of the iterator and check the relational constraints within a second iterator using any (so that you do not generate the cross-product of the main list with the other lists):

    resources = [
        res
        for res in task_resources
        if any(
            rel["package_name"] == req["task_name"]
            for req in req_tasks if req["qos_level"] == res["qos_level"]
            for rel in rel_tasks if rel["tasks_id"] == res["tasks_id"]
        )
    ]
    print(resources)
    

    Which outputs:

    [{'cpu_cores': 1,
      'cpu_flops': 19177,
      'create_time': '2024-11-13T12:13:52.192657',
      'domain_name': 'Nep',
      'egress_network_bandwidth_gbps': 0,
      'gpu_flops': 0,
      'gpu_memory_mb': 0,
      'ingress_network_bandwidth_gbps': 0,
      'memory_mb': 128,
      'qos_level': 'default',
      'resource_id': '52a58a34-6b1b-49b7-b53d-2bc4bf72c172',
      'storage_mb': 0,
      'tasks_id': '702148b2-4f21-4034-af8d-d11737f9f811'},
     {'cpu_cores': 1,
      'cpu_flops': 19177,
      'create_time': '2024-11-13T12:13:52.197433',
      'domain_name': 'Nep',
      'egress_network_bandwidth_gbps': 0,
      'gpu_flops': 0,
      'gpu_memory_mb': 0,
      'ingress_network_bandwidth_gbps': 0,
      'memory_mb': 128,
      'qos_level': 'default',
      'resource_id': 'c34a4411-20d3-42eb-9f74-1336ab6ab024',
      'storage_mb': 0,
      'tasks_id': '95bc75a6-b292-484d-a65d-86fae2cb6b6f'}]