elasticsearchelasticsearch-queryelasticsearch-analyzers

Elastic Search Analyzer at search time not working


  1. Create the Index with Query-Time Analyzer Only:
PUT /local_persons
{
  "settings": {
    "analysis": {
      "analyzer": {
        "person_search_analyzer": {
          "type": "custom",
          "char_filter": ["remove_special_chars"],
          "filter": ["lowercase"],
          "tokenizer": "whitespace"
        }
      },
      "char_filter": {
        "remove_special_chars": {
          "type": "pattern_replace",
          "pattern": "[^a-zA-Z0-9]",
          "replacement": ""
        }
      }
    }
  }
}
  1. Indexed the data with special characters:
PUT /local_persons/_doc/1
{
  "id": 1,
  "firstName": "Re'mo",
  "lastName": "D'souza",
  "email": "remo.d@test.com,
  "dateOfBirth": "1973-01-01",
  "isActive": 1
}

Now searching for the person at the query time:

Approach-1: Search using query_string (with analyzer at query time)

GET /local_persons/_search
{
  "query": {
    "bool": {
      "must": [
        {
          "query_string": {
            "query": "remo",
            "fields": ["firstName"],
            "analyzer": "person_search_analyzer"
          }
        },
        {
          "query_string": {
            "query": "dsouza",
            "fields": ["lastName"],
            "analyzer": "person_search_analyzer"
          }
        }
      ]
    }
  }
}

Approach 2: Using the match query

GET /local_persons/_search
{
  "query": {
    "bool": {
      "must": [
        {
          "match": {
            "firstName": {
              "query": "remo",
              "analyzer": "person_search_analyzer"
            }
          }
        },
        {
          "match": {
            "lastName": {
              "query": "dsouza",
              "analyzer": "person_search_analyzer"
            }
          }
        }
      ]
    }
  }
}

But both of the approaches above are giving empty results.


Solution

  • Elastic is not returning response because it has consider 'standard' as default analyzer for indexing as you didn't defined index mapping and analyzer.

    You can execute below API to get info about what is indexed in ES:

    POST local_persons/_analyze
    {
      "text":["Re'mo"],
      "analyzer" : "standard"
    }
    

    Response:

    {
      "tokens": [
        {
          "token": "re'mo",
          "start_offset": 0,
          "end_offset": 5,
          "type": "<ALPHANUM>",
          "position": 0
        }
      ]
    }
    

    Lets see result of your analyzer:

    POST local_persons/_analyze
    {
      "text":["Re'mo"],
      "analyzer" : "person_search_analyzer"
    }
    

    Response:

    {
      "tokens": [
        {
          "token": "remo",
          "start_offset": 0,
          "end_offset": 5,
          "type": "word",
          "position": 0
        }
      ]
    }
    

    If you observe both the response then you can clearly see that both are giving different response and hence Elastic is not returning response.

    So, you can define index mapping like below which will make sure that index and search time it will use same analyzer and will return expected result as well.

    PUT /local_persons1
    {
      "settings": {
        "analysis": {
          "analyzer": {
            "person_search_analyzer": {
              "type": "custom",
              "char_filter": ["remove_special_chars"],
              "filter": ["lowercase"],
              "tokenizer": "whitespace"
            }
          },
          "char_filter": {
            "remove_special_chars": {
              "type": "pattern_replace",
              "pattern": "[^a-zA-Z0-9]",
              "replacement": ""
            }
          }
        }
      },
      "mappings": {
        "properties": {
          "firstName":{
            "type": "text",
            "analyzer": "person_search_analyzer"
          },
          "lastName":{
            "type": "text",
            "analyzer": "person_search_analyzer"
          }
        }
      }
    }
    

    Now both of your query will return the response even if you don't provide analyzer in response.

    Query:

    GET /local_persons1/_search
    {
      "query": {
        "bool": {
          "must": [
            {
              "match": {
                "firstName": {
                  "query": "remo"
                }
              }
            },
            {
              "match": {
                "lastName": {
                  "query": "dsouza"
                }
              }
            }
          ]
        }
      }
    }
    

    Response:

    {
      "took": 2,
      "timed_out": false,
      "_shards": {
        "total": 1,
        "successful": 1,
        "skipped": 0,
        "failed": 0
      },
      "hits": {
        "total": {
          "value": 1,
          "relation": "eq"
        },
        "max_score": 0.5753642,
        "hits": [
          {
            "_index": "local_persons1",
            "_id": "1",
            "_score": 0.5753642,
            "_source": {
              "id": 1,
              "firstName": "Re'mo",
              "lastName": "D'souza",
              "email": "remo.d@test.com",
              "dateOfBirth": "1973-01-01",
              "isActive": 1
            }
          }
        ]
      }