elasticsearchhighlightcpu-wordsynonymmultiple-matches

Elasticsearch not highlighting all matches


I'm having a hard time trying to understand why the following query object doesn't make ES highlight all words within a _source column.

{
    _source: [
        'baseline',
        'cdrp',
        'date',
        'description',
        'dev_status',
        'element',
        'event',
        'id'
    ],
    track_total_hits: true,
    query: {
        bool: {
            filter: [],
            should: [
                {
                    multi_match:{
                        query: "imposed calcs",
                        fields: ["cdrp","description","narrative.*","title","cop"]
                    }
                }
            ]
        } 
    },
    highlight: { fields: { '*': {} } },
    sort: [],
    from: 0,
    size: 50
}

By running this query I get the following highlight object returned. Notice only the "calcs" word is highlighted. How do I build the highlight object to make ES highlight "Imposed" as well?

"highlight": {
    "description": [
        "GAP Sub-window conn ONe-e: heve PP-BE Defined ASST requirem RV confsng, des MAN Imposed <em>calcs</em> mising"
    ]
} 

I am using the following "description" mapping:

"description": {
    "type": "text",
    "analyzer": "search_synonyms"
},



"analysis": {
    "analyzer": {
        "search_synonyms": {
            "tokenizer": "whitespace",
            "filter": [
                "graph_synonyms"
            ],
            "normalizer": [
                "normalizer_1"
            ]
        }
    },
    "filter": {
        "graph_synonyms": {
            "type": "synonym_graph",
            "synonyms_path": "synonym.txt"
        }
    },
    "normalizer": {
        "normalizer_1": {
            "type": "custom",
            "char_filter": [],
            "filter": ["lowercase", "asciifolding"]
        }
    }
}

Solution

  • EDIT

    I think your graph_synonyms filter is overriding the normalizer's filters. Try this:

    PUT highlighter
    {
      "settings": {
        "analysis": {
          "analyzer": {
            "search_synonyms": {
              "tokenizer": "whitespace",
              "filter": [
                "graph_synonyms",
                "lowercase",
                "asciifolding"
              ]
            }
          },
          "filter": {
            "graph_synonyms": {
              "type": "synonym_graph",
              "synonyms_path": "synonym.txt"
            }
          }
        }
      },
      "mappings": {
        "properties": {
          "description": {
            "type": "text",
            "analyzer": "search_synonyms"
          }
        }
      }
    }
    

    ORIGINAL

    I suspect there's a some sort of a setting in your mapping preventing the match since I could not replicate this with a semi-default mapping:

    PUT highlighter
    {
      "settings": {
        "analysis": {
          "analyzer": {
            "my_analyzer": {
              "tokenizer": "standard",
              "filter": [
                "lowercase"
              ]
            }
          }
        }
      },
      "mappings": {
        "properties": {
          "description": {
            "type": "text",
            "fields": {
              "lowercase": {
                "type": "text",
                "analyzer": "my_analyzer"
              }
            }
          }
        }
      }
    }
    
    POST highlighter/_doc
    {
      "description": "GAP Sub-window conn ONe-e: heve PP-BE Defined ASST requirem RV confsng, des MAN Imposed calcs mising"
    }
    

    Plugging in your query

    GET highlighter/_search
    {
      "_source": [
        "baseline",
        "cdrp",
        "date",
        "description",
        "dev_status",
        "element",
        "event",
        "id"
      ],
      "track_total_hits": true,
      "query": {
        "bool": {
          "filter": [],
          "should": [
            {
              "multi_match": {
                "query": "imposed calcs",
                "fields": [
                  "cdrp",
                  "description.lowercase",
                  "narrative.*",
                  "title",
                  "cop"
                ]
              }
            }
          ]
        }
      },
      "highlight": {
        "fields": {
          "*": {}
        }
      },
      "sort": [],
      "from": 0,
      "size": 50
    }
    

    yielding

    [
      {
        "_index":"highlighter",
        "_type":"_doc",
        "_id":"Bf5F5HEBW-D5QnrWwTyh",
        "_score":0.5753642,
        "_source":{
          "description":"GAP Sub-window conn ONe-e: heve PP-BE Defined ASST requirem RV confsng, des MAN Imposed calcs mising"
        },
        "highlight":{
          "description":[
            "GAP Sub-window conn ONe-e: heve PP-BE Defined ASST requirem RV confsng, des MAN <em>Imposed</em> <em>calcs</em> mising"
          ]
        }
      }
    ]