elasticsearchbuckets

Elasticsearch aggregation queries


I am working on a project concerning cooking recipes. I installed Elasticsearch 1.5.2 and I added a lot of products such as vegetables or meats in many indexes of supermarkets. All was well until I started aggregations queries. For example: to make a mash potatoes I need potatoes, beans, chickpeas, broccoli, milk, pepper, salt. I have all this products stored. I need to make one query to search the cheapest of this products in all indexes. I tried many queries but I didn't find what I need.

This is the example, all these queries works but I need their results in one query:

 POST /_search
    {
        "query": {
            "query_string": {
               "query": "pommes de terre",
               "fields": [
                  "titre"
               ]
            }
        },
       "sort" : [
          {"prix en €/kg" : {"order" : "asc"}}
       ]
    }

    POST /_search
    {
        "query": {
            "query_string": {
               "query": "haricots",
               "fields": [
                  "titre"
               ]
            }
        },
       "sort" : [
          {"prix en €/kg" : {"order" : "asc"}}
       ]
    }

    POST /_search
    {
        "query": {
            "query_string": {
               "query": "pois chiche",
               "fields": [
                  "titre"
               ]
            }
        },
       "sort" : [
          {"prix en €/kg" : {"order" : "asc"}}
       ]
    }

    POST /_search
    {
        "query": {
            "query_string": {
               "query": "brocoli",
               "fields": [
                  "titre"
               ]
            }
        },
       "sort" : [
          {"prix en €/kg" : {"order" : "asc"}}
       ]
    }

    POST /_search
    {
        "query": {
            "query_string": {
               "query": "lait",
               "fields": [
                  "tags"
               ]
            }
        },
       "sort" : [
          {"prix en €/L" : {"order" : "asc"}}
       ]
    }

    POST /_search
    {
        "query": {
            "query_string": {
               "query": "poivre",
               "fields": [
                  "tags"
               ]
            }
        },
       "sort" : [
          {"prix en €/kg" : {"order" : "asc"}}
       ]
    }

    POST /_search
    {
        "query": {
            "query_string": {
               "query": "sel",
               "fields": [
                  "tags"
               ]
            }
        },
       "sort" : [
          {"prix en €/kg" : {"order" : "asc"}}
       ]
    }

I want to have only one query to fetch the results of all these queries and I only want the cheapest ones, not all the lists.


Solution

  • You could define a filter aggregation for each product in your query

    {
        "aggs" : {
            "sel" : {
                "filter" : {
                    "query_string": {
                       "query": "sel",
                       "fields": [
                          "titre"
                       ]
                    }
                }
            },
            "haricots" : {
                "filter" : {
                    "query_string": {
                       "query": "haricots",
                       "fields": [
                          "titre"
                       ]
                    }
                }
            }
        }
    }
    

    Note that on ES<2, you probably will have to wrap your filter in a query object:

    {
        "aggs" : {
            "sel" : {
                "filter" : {
                    "query": {
                        "query_string": {
                           "query": "sel",
                           "fields": [
                              "titre"
                           ]
                        }
                    }
                }
            },
            "haricots" : {
                "filter" : {
                    "query": {
                        "query_string": {
                           "query": "haricots",
                           "fields": [
                              "titre"
                           ]
                        }
                    }
                }
            }
        }
    }
    

    Each aggregation build a bucket corresponding your filters. You would then add a top hits sub aggregation to each of these buckets to get the lowest price. For example, with the sel bucket :

    {
        "aggs" : {
            "sel" : {
                "filter" : {
                    "query_string": {
                       "query": "sel",
                       "fields": [
                          "titre"
                       ]
                    }
                },
                "aggs" : {
                    "minprice": {
                        "top_hits": {
                            "sort": [
                                {
                                    "prix en €/kg": {
                                        "order": "asc"
                                    }
                                }
                            ],
                            "size" : 1
                        }
                    }
                }
            }
        }
    }
    

    And a fuller example with sel, brocoli and haricots

    {
        "aggs" : {
            "sel" : {
                "filter" : {
                    "query_string": {
                       "query": "sel",
                       "fields": [
                          "titre"
                       ]
                    }
                },
                "aggs" : {
                    "minprice": {
                        "top_hits": {
                            "sort": [
                                {
                                    "prix en €/kg": {
                                        "order": "asc"
                                    }
                                }
                            ],
                            "size" : 1
                        }
                    }
                }
            },
            "haricots" : {
                "filter" : {
                    "query_string": {
                       "query": "haricots",
                       "fields": [
                          "titre"
                       ]
                    }
                },
                "aggs" : {
                    "minprice": {
                        "top_hits": {
                            "sort": [
                                {
                                    "prix en €/kg": {
                                        "order": "asc"
                                    }
                                }
                            ],
                            "size" : 1
                        }
                    }
                }
            },
            "brocoli" : {
                "filter" : {
                    "query_string": {
                       "query": "brocoli",
                       "fields": [
                          "titre"
                       ]
                    }
                },
                "aggs" : {
                    "minprice": {
                        "top_hits": {
                            "sort": [
                                {
                                    "prix en €/kg": {
                                        "order": "asc"
                                    }
                                }
                            ],
                            "size" : 1
                        }
                    }
                }
            }
        }
    }
    

    Your results would look like

    {
        "aggregations" : {
            "sel" : {
                "doc_count" : 2,
                "minprice" : {
                    "hits" : {
                        "total" : 2,
                        "max_score" : null,
                        "hits" : [
                            {
                                "_index" : "test",
                                "_type" : "product",
                                "_id" : "1",
                                "_score" : null,
                                "_source" : {
                                    "id" : 1,
                                    "titre" : "sel 1",
                                    "prix en €/kg" : 1
                                },
                                "sort" : [
                                    1.0
                                ]
                            }
                        ]
                    }
                }
            },
            "haricots" : {
                "doc_count" : 1,
                "minprice" : {
                    "hits" : {
                        "total" : 1,
                        "max_score" : null,
                        "hits" : [
                            {
                                "_index" : "test",
                                "_type" : "product",
                                "_id" : "3",
                                "_score" : null,
                                "_source" : {
                                    "id" : 3,
                                    "titre" : "haricots",
                                    "prix en €/kg" : 3
                                },
                                "sort" : [
                                    3.0
                                ]
                            }
                        ]
                    }
                }
            }
        }
    }