I have data as follows:
{
"_index": "index",
"_id": "...",
"_score": 4.512486,
"_source": {
"division": {
"name": "Ben",
"id": "678"
},
"summary": {
"severity": 2,
"last_issue_at": "...",
"first_issue_at": "...",
"issues_count": 1
},
"company": {
"id": "..."
},
"ingest_timestamp": "..",
"asset": {
"name": "Bus",
"vin": "123",
"id": "...",
"manufacturer": ...
},
"timestamp": "..."
}
{
"_index": "index",
"_id": "...",
"_score": 4.512486,
"_source": {
"division": {
"name": "Ben",
"id": "678"
},
"summary": {
"severity": 1,
"last_issue_at": "...",
"first_issue_at": "...",
"issues_count": 1
},
"company": {
"id": "..."
},
"ingest_timestamp": "..",
"asset": {
"name": "Bus",
"vin": "123",
"id": "...",
"manufacturer": ...
},
"timestamp": "..."
}
Goal: Count unique assets with a severity of 1 or 2. If an asset has both severities, only count the one with severity 1.
Here is my query so far:
{
"query": {
"bool": {
"must": [
{
"match": {
"company.id": "..."
}
},
{
"range": {
"timestamp": {
"gte": "...",
"lte": "..."
}
}
}
]
}
},
"runtime_mappings": {
"asset_id_and_vin": {
"type": "keyword",
"script": {
"source": "if (doc.containsKey('asset.vin')) { emit(doc['asset.id'] + ' ' + doc['asset.vin']) } else { emit(doc['asset.id'].value + ' ' + 'N/A') }"
}
}
},
"aggs": {
"asset_count": {
"cardinality": {
"field": "asset_id_and_vin"
}
},
"assets": {
"multi_terms": {
"terms": [
{
"field": "asset.name"
},
{
"field": "asset.vin",
"missing": "N/A"
},
{
"field": "division.name"
},
{
"field": "asset.id"
},
{
"field": "division.id"
}
],
"order": {
"earliest_date": "desc"
},
"size": 100
},
"aggs": {
"severity": {
"min": {
"field": "summary.severity"
}
},
"earliest_date": {
"min": {
"field": "summary.first_issue_at"
}
},
"latest_date": {
"max": {
"field": "summary.last_issue_at"
}
},
"issues": {
"sum": {
"field": "summary.issues_count"
}
},
"filtered_assets": {
"bucket_selector": {
"buckets_path": {
"min_severity": "severity"
},
"script": "params.min_severity == 1 || params.min_severity == 2"
}
},
"pagination": {
"bucket_sort": {
"size": 100,
"from": 0
}
}
}
}
}
}
I added the bucket_selector
to return severities that are either 1 or 2, not both. However, this doesn't account for assets that can have both severities, as it checks one document at a time. How can I modify my query to ensure I only count the asset with severity 1 if it has both severities?
Any advice or suggestions for improving my query would be greatly appreciated. Thanks in advance!
Instead of bucket_selector, you need to use bucket_sort, it is a bit tricky since bucket sort cannot work with terms directly, so you need to add another metric aggregation underneath that would simply mirror the keys in terms (min_value
in the example below).
Unfortunately, your requirement of sorting by earliest date is not feasible in this setup. The earliest date can only be found in a pipeline aggregation, which cannot be used for composite aggs sorting. To sort by earliest date we would need to how you index the data.
DELETE test
PUT test
{
"mappings": {
"properties": {
"division": {
"properties": {
"id": {
"type": "keyword"
},
"name": {
"type": "keyword"
}
}
},
"asset": {
"properties": {
"id": {
"type": "keyword"
},
"vin": {
"type": "keyword"
},
"name": {
"type": "keyword"
}
}
},
"company": {
"properties": {
"id": {
"type": "keyword"
}
}
}
}
}
}
POST test/_bulk?refresh
{ "index": { "_id": "1" } }
{"division":{"name":"Ben","id":"678"},"summary":{"severity":1,"last_issue_at":"2016-01-01","first_issue_at":"2015-01-01","issues_count":1},"company":{"id":"..."},"ingest_timestamp":"..","asset":{"name":"Bus","vin":"123","id":"...","manufacturer":"..."},"timestamp":"..."}
{ "index": { "_id": "2" } }
{"division":{"name":"Ben","id":"678"},"summary":{"severity":2,"last_issue_at":"2024-01-01","first_issue_at":"2020-01-01","issues_count":1},"company":{"id":"..."},"ingest_timestamp":"..","asset":{"name":"Bus","vin":"123","id":"...","manufacturer":"..."},"timestamp":"..."}
POST test/_search
{
"size": 0,
"query": {
"bool": {
"must": [
{
"match": {
"company.id": "..."
}
}
]
}
},
"runtime_mappings": {
"asset_id_and_vin": {
"type": "keyword",
"script": {
"source": "if (doc.containsKey('asset.vin')) { emit(doc['asset.id'] + ' ' + doc['asset.vin']) } else { emit(doc['asset.id'].value + ' ' + 'N/A') }"
}
}
},
"aggs": {
"asset_count": {
"cardinality": {
"field": "asset_id_and_vin"
}
},
"assets": {
"multi_terms": {
"terms": [
{
"field": "asset.name"
},
{
"field": "asset.vin",
"missing": "N/A"
},
{
"field": "division.name"
},
{
"field": "asset.id"
},
{
"field": "division.id"
}
],
"size": 100
},
"aggs": {
"severity": {
"terms": {
"field": "summary.severity"
},
"aggs": {
"min_value": {
"min": {
"field": "summary.severity"
}
},
"earliest_date": {
"min": {
"field": "summary.first_issue_at"
}
},
"latest_date": {
"max": {
"field": "summary.last_issue_at"
}
},
"issues": {
"sum": {
"field": "summary.issues_count"
}
},
"min_severity": {
"bucket_sort": {
"sort": [{
"min_value": {
"order": "asc"
}
}],
"size": 1
}
}
}
}
}
}
}
}