I have the following data, and what I'm want to do is bring into a new list of dicts those from task_resources that match on both the tasks_id and the qos_level:
task_resources = [
{
"resource_id": "52a58a34-6b1b-49b7-b53d-2bc4bf72c172",
"domain_name": "Nep",
"tasks_id": "702148b2-4f21-4034-af8d-d11737f9f811",
"qos_level": "default",
"cpu_cores": 1,
"memory_mb": 128,
"storage_mb": 0,
"cpu_flops": 19177,
"gpu_flops": 0,
"gpu_memory_mb": 0,
"egress_network_bandwidth_gbps": 0,
"ingress_network_bandwidth_gbps": 0,
"create_time": "2024-11-13T12:13:52.192657"
},
{
"resource_id": "8208915d-90b4-4a6c-a98c-e9eb965b4e6b",
"domain_name": "Nep",
"tasks_id": "702148b2-4f21-4034-af8d-d11737f9f811",
"qos_level": "low",
"cpu_cores": 1,
"memory_mb": 128,
"storage_mb": 0,
"cpu_flops": 19177,
"gpu_flops": 0,
"gpu_memory_mb": 0,
"egress_network_bandwidth_gbps": 0,
"ingress_network_bandwidth_gbps": 0,
"create_time": "2024-11-13T12:13:52.202177"
},
{
"resource_id": "c34a4411-20d3-42eb-9f74-1336ab6ab024",
"domain_name": "Nep",
"tasks_id": "95bc75a6-b292-484d-a65d-86fae2cb6b6f",
"qos_level": "default",
"cpu_cores": 1,
"memory_mb": 128,
"storage_mb": 0,
"cpu_flops": 19177,
"gpu_flops": 0,
"gpu_memory_mb": 0,
"egress_network_bandwidth_gbps": 0,
"ingress_network_bandwidth_gbps": 0,
"create_time": "2024-11-13T12:13:52.197433"
}
]
req_tasks = [
{'task_name': 'iperf-server', 'qos_level': 'default'},
{'task_name': 'iperf-client', 'qos_level': 'default'}
]
rel_tasks = [
{
'tasks_id': '702148b2-4f21-4034-af8d-d11737f9f811',
'domain_name': 'Nep',
'package_name': 'iperf-client',
'composite_key': 'Nep-iperf-client',
'version': 'v1',
'package_type': 'GENERIC',
'interoperability_tags': 'null',
'blacklisted_tags': 'null',
'task_create_time': '2024-11-13T12:13:52.190297'
},
{
'tasks_id': '95bc75a6-b292-484d-a65d-86fae2cb6b6f',
'domain_name': 'Nep',
'package_name': 'iperf-server',
'composite_key': 'Nep-iperf-server',
'version': 'v1',
'package_type': 'GENERIC',
'interoperability_tags': 'null',
'blacklisted_tags': 'null',
'task_create_time': '2024-11-13T12:13:52.195040'
}]
and I have this code:
task_ids = [
d["tasks_id"]
for d in rel_tasks
for t in req_tasks
if d["package_name"] == t["task_name"]
]
resources = [
d
for t in req_tasks
for d in task_resources
if d["tasks_id"] in task_ids and d["qos_level"] == t["qos_level"]
]
print(resources)
This returns:
[
{
"resource_id": "52a58a34-6b1b-49b7-b53d-2bc4bf72c172",
"domain_name": "Nep",
"tasks_id": "702148b2-4f21-4034-af8d-d11737f9f811",
"qos_level": "default",
"cpu_cores": 1,
"memory_mb": 128,
"storage_mb": 0,
"cpu_flops": 19177,
"gpu_flops": 0,
"gpu_memory_mb": 0,
"egress_network_bandwidth_gbps": 0,
"ingress_network_bandwidth_gbps": 0,
"create_time": "2024-11-13T12:13:52.192657",
},
{
"resource_id": "c34a4411-20d3-42eb-9f74-1336ab6ab024",
"domain_name": "Nep",
"tasks_id": "95bc75a6-b292-484d-a65d-86fae2cb6b6f",
"qos_level": "default",
"cpu_cores": 1,
"memory_mb": 128,
"storage_mb": 0,
"cpu_flops": 19177,
"gpu_flops": 0,
"gpu_memory_mb": 0,
"egress_network_bandwidth_gbps": 0,
"ingress_network_bandwidth_gbps": 0,
"create_time": "2024-11-13T12:13:52.197433",
},
{
"resource_id": "52a58a34-6b1b-49b7-b53d-2bc4bf72c172",
"domain_name": "Nep",
"tasks_id": "702148b2-4f21-4034-af8d-d11737f9f811",
"qos_level": "default",
"cpu_cores": 1,
"memory_mb": 128,
"storage_mb": 0,
"cpu_flops": 19177,
"gpu_flops": 0,
"gpu_memory_mb": 0,
"egress_network_bandwidth_gbps": 0,
"ingress_network_bandwidth_gbps": 0,
"create_time": "2024-11-13T12:13:52.192657",
},
{
"resource_id": "c34a4411-20d3-42eb-9f74-1336ab6ab024",
"domain_name": "Nep",
"tasks_id": "95bc75a6-b292-484d-a65d-86fae2cb6b6f",
"qos_level": "default",
"cpu_cores": 1,
"memory_mb": 128,
"storage_mb": 0,
"cpu_flops": 19177,
"gpu_flops": 0,
"gpu_memory_mb": 0,
"egress_network_bandwidth_gbps": 0,
"ingress_network_bandwidth_gbps": 0,
"create_time": "2024-11-13T12:13:52.197433",
},
]
This returns the first two dicts in the list being duplicated. Is there a way to change my list comprehension, or deduplicate the result, so I get this:
[
{
"resource_id": "52a58a34-6b1b-49b7-b53d-2bc4bf72c172",
"domain_name": "Nep",
"tasks_id": "702148b2-4f21-4034-af8d-d11737f9f811",
"qos_level": "default",
"cpu_cores": 1,
"memory_mb": 128,
"storage_mb": 0,
"cpu_flops": 19177,
"gpu_flops": 0,
"gpu_memory_mb": 0,
"egress_network_bandwidth_gbps": 0,
"ingress_network_bandwidth_gbps": 0,
"create_time": "2024-11-13T12:13:52.192657",
},
{
"resource_id": "c34a4411-20d3-42eb-9f74-1336ab6ab024",
"domain_name": "Nep",
"tasks_id": "95bc75a6-b292-484d-a65d-86fae2cb6b6f",
"qos_level": "default",
"cpu_cores": 1,
"memory_mb": 128,
"storage_mb": 0,
"cpu_flops": 19177,
"gpu_flops": 0,
"gpu_memory_mb": 0,
"egress_network_bandwidth_gbps": 0,
"ingress_network_bandwidth_gbps": 0,
"create_time": "2024-11-13T12:13:52.197433",
},
]
You can iterate over the list of dictionaries and then perform the check in the if
clause of the iterator and check the relational constraints within a second iterator using any
(so that you do not generate the cross-product of the main list with the other lists):
resources = [
res
for res in task_resources
if any(
rel["package_name"] == req["task_name"]
for req in req_tasks if req["qos_level"] == res["qos_level"]
for rel in rel_tasks if rel["tasks_id"] == res["tasks_id"]
)
]
print(resources)
Which outputs:
[{'cpu_cores': 1,
'cpu_flops': 19177,
'create_time': '2024-11-13T12:13:52.192657',
'domain_name': 'Nep',
'egress_network_bandwidth_gbps': 0,
'gpu_flops': 0,
'gpu_memory_mb': 0,
'ingress_network_bandwidth_gbps': 0,
'memory_mb': 128,
'qos_level': 'default',
'resource_id': '52a58a34-6b1b-49b7-b53d-2bc4bf72c172',
'storage_mb': 0,
'tasks_id': '702148b2-4f21-4034-af8d-d11737f9f811'},
{'cpu_cores': 1,
'cpu_flops': 19177,
'create_time': '2024-11-13T12:13:52.197433',
'domain_name': 'Nep',
'egress_network_bandwidth_gbps': 0,
'gpu_flops': 0,
'gpu_memory_mb': 0,
'ingress_network_bandwidth_gbps': 0,
'memory_mb': 128,
'qos_level': 'default',
'resource_id': 'c34a4411-20d3-42eb-9f74-1336ab6ab024',
'storage_mb': 0,
'tasks_id': '95bc75a6-b292-484d-a65d-86fae2cb6b6f'}]