I'm looking to perform a composite aggregation on the following documents:
[
{
"title": "Document 1",
"tags": ["elasticsearch", "aggregation", "elasticsearch"]
}
{
"title": "Document 2",
"tags": ["elasticsearch", "search", "search"]
}
{
"title": "Document 3",
"tags": ["aggregation", "search"]
}
]
When running this aggregation:
{
"size": 0,
"aggs": {
"tags_count": {
"terms": {
"field": "tags.keyword"
}
}
}
}
I am expecting each value to be counted individually regardless of whether or not it's duplicated, like so:
{
"aggregations": {
"tags_count": {
"buckets": [
{ "key": "elasticsearch", "doc_count": 3 },
{ "key": "search", "doc_count": 3 },
{ "key": "aggregation", "doc_count": 2 }
]
}
}
}
However, I actually get this:
{
"aggregations": {
"tags_count": {
"buckets": [
{ "key": "elasticsearch", "doc_count": 2 },
{ "key": "search", "doc_count": 2 },
{ "key": "aggregation", "doc_count": 2 }
]
}
}
}
Is there a way to achieve my expected behaviour?
I don't think you will be able to bend the classic aggregations to your needs, you will need a custom script aggregation.
In order to run build this script I created the following dataset:
POST 79469094/_bulk
{"index": {}}
{"title": "Document 1","tags": ["elasticsearch","aggregation","elasticsearch"]}
{"index": {}}
{"title": "Document 2","tags": ["elasticsearch","search","search"]}
{"index": {}}
{"title": "Document 3","tags": ["aggregation","search"]}
Using a script aggregation like the following should do the trick.
GET 79469094/_search
{
"size": 0,
"aggs": {
"tags_count": {
"scripted_metric": {
"init_script": "state.counts = [:]",
"map_script": """
for (tag in params._source.tags) {
state.counts[tag] = state.counts.containsKey(tag) ? state.counts[tag] + 1 : 1;
}
""",
"combine_script": "return state.counts",
"reduce_script": """
Map finalCounts = [:];
for (state in states) {
for (entry in state.entrySet()) {
finalCounts[entry.getKey()] = finalCounts.containsKey(entry.getKey()) ?
finalCounts[entry.getKey()] + entry.getValue() : entry.getValue();
}
}
return finalCounts;
"""
}
}
}
}
{
"took": 30,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 3,
"relation": "eq"
},
"max_score": null,
"hits": []
},
"aggregations": {
"tags_count": {
"value": {
"search": 3,
"elasticsearch": 3,
"aggregation": 2
}
}
}
}