elasticsearchdsl

Elasticsearch: search elements outside a range


I have data stored in elastic with a year_start and year_end integer values.

Then, I use a "year" to query elements.

My goal is to search elements that are within a "year".

When I need to search elements in a "year" that is within that range, I can do it ok using:

{
        "query": {
            "bool": {
                "filter": [
                    {
                        "range": {
                            "year_start": {
                                "lte": year
                            }
                        }
                    },
                    {
                        "range": {
                            "year_end": {
                                "gte": year
                            }
                        }
                    }
                ]
         }
}

However, I can't get elements that are NOT in that range. I tried this:

{
        "query": {
            "must_not": {
                "filter": [
                    {
                        "range": {
                            "year_start": {
                                "lte": year
                            }
                        }
                    },
                    {
                        "range": {
                            "year_end": {
                                "gte": year
                            }
                        }
                    }
                ]
         }
}

My quessing is that it filter each of both conditions separately, and it's not using both at the same time. Something like, "first I exclude everything up to year, and then later I exclude everything from year", instead of the expected "exclude every element that matches both conditions at the same time".

[EDIT] Based on the answer received, here's an example of what I'm trying, but still not working. It should get all elements for year 2022 and not with year 2023:

DELETE test

PUT test/_bulk?refresh
{"index": {}}
{"year_start": "2022", "year_end": "2022" }
{"index": {}}
{"year_start": "2023", "year_end": "2024" }
{"index": {}}
{"year_start": "2021", "year_end": "2025" }
{"index": {}}
{"year_start": "2016", "year_end": "2020" }

# All ranges that include 2022 and not 2023
GET test/_search
{
  "query": {
    "bool": {
      "must": [
        {
          "range": {
            "year_start": {
              "lte": 2022
            }
          }
        },
        {
          "range": {
            "year_end": {
              "gte": 2022
            }
          }
        }
      ],
      "should": [
        {
          "range": {
            "year_start": {
              "gt": 2023
            }
          }
        },
        {
          "range": {
            "year_end": {
              "lt": 2023
            }
          }
        }
      ]
    }
  }
}

Solution

  • There is a couple possible solutions here. If you store the range boundaries as separate fields you can use range queries:

    DELETE test
    
    PUT test
    {
      "mappings": {
        "properties": {
          "year_start": {
            "type": "integer"
          },
          "year_end": {
            "type": "integer"
          }
        }
      }
    }
    
    PUT test/_bulk?refresh
    {"index": {}}
    {"year_start": 2022, "year_end": 2022 }
    {"index": {}}
    {"year_start": 2023, "year_end": 2024 }
    {"index": {}}
    {"year_start": 2021, "year_end": 2025 }
    {"index": {}}
    {"year_start": 2016, "year_end": 2020 }
    
    # All ranges that include 2022
    GET test/_search
    {
      "query": {
        "bool": {
          "must": [
            {
              "range": {
                "year_start": {
                  "lte": 2022
                }
              }
            },
            {
              "range": {
                "year_end": {
                  "gte": 2022
                }
              }
            }
          ]
        }
      }
    }
    
    # All ranges that don't include 2022
    GET test/_search
    {
      "query": {
        "bool": {
          "should": [
            {
              "range": {
                "year_start": {
                  "gt": 2022
                }
              }
            },
            {
              "range": {
                "year_end": {
                  "lt": 2022
                }
              }
            }
          ]
        }
      }
    }
    
    # All ranges that don't include 2023
    GET test/_search
    {
      "query": {
        "bool": {
          "should": [
            {
              "range": {
                "year_start": {
                  "gt": 2023
                }
              }
            },
            {
              "range": {
                "year_end": {
                  "lt": 2023
                }
              }
            }
          ]
        }
      }
    }
    
    # All ranges that include 2022 but don't include 2023
    GET test/_search
    {
      "query": {
        "bool": {
          "must": [
            {
              "bool": {
                "must": [
                  {
                    "range": {
                      "year_start": {
                        "lte": 2022
                      }
                    }
                  },
                  {
                    "range": {
                      "year_end": {
                        "gte": 2022
                      }
                    }
                  }
                ]
              }
            },
            {
              "bool": {
                "should": [
                  {
                    "range": {
                      "year_start": {
                        "gt": 2023
                      }
                    }
                  },
                  {
                    "range": {
                      "year_end": {
                        "lt": 2023
                      }
                    }
                  }
                ]
              }
            }
          ]
        }
      }
    }
    
    

    The last query can be simplified by moving the must clause from the first inner query into the outer query, but I left it as is for clarity.

    An alternative solution for storing and searching ranges is using range type:

    DELETE test
    PUT test
    {
      "mappings": {
        "properties": {
          "year_range": {
            "type": "integer_range"
          }
        }
      }
    }
    
    PUT test/_bulk?refresh
    {"index": {}}
    {"year_range":{ "gte":  2022, "lte": 2022 }}
    {"index": {}}
    {"year_range":{ "gte":  2023, "lte": 2024 }}
    {"index": {}}
    {"year_range":{ "gte":  2021, "lte": 2025 }}
    {"index": {}}
    {"year_range":{ "gte":  2016, "lte": 2020 }}
    
    
    # All ranges that include 2022
    GET test/_search
    {
      "query": {
        "term" : {
          "year_range" : {
            "value": 2022
          }
        }
      }
    }
    
    # All ranges that don't include 2022
    GET test/_search
    {
      "query": {
        "bool": {
          "must_not": [
            {
              "term": {
                "year_range": {
                  "value": 2022
                }
              }
            }
          ]
        }
      }
    }