mongodbindexingpymongotext-search

MongoDB indexed text search only works for exact match


I have field 'user_name' populated with data. This code gives me no results:

history = db.history
history.create_index([('user_name', 'text')])
history.find({'$text' : {'$search' : 'a'}})

But when I specify the exact name, it works

history.find({'$text' : {'$search' : 'exact name'}})

Here is the output of explain() for 'a' search:

{
    "executionSuccess": true,
    "nReturned": 0,
    "executionTimeMillis": 0,
    "totalKeysExamined": 0,
    "totalDocsExamined": 0,
    "executionStages": {
        "stage": "TEXT",
        "nReturned": 0,
        "executionTimeMillisEstimate": 0,
        "works": 1,
        "advanced": 0,
        "needTime": 0,
        "needYield": 0,
        "saveState": 0,
        "restoreState": 0,
        "isEOF": 1,
        "indexPrefix": {},
        "indexName": "user_name_text",
        "parsedTextQuery": { "terms": [], "negatedTerms": [], "phrases": [], "negatedPhrases": [] },
        "textIndexVersion": 3,
        "inputStage": {
            "stage": "TEXT_MATCH",
            "nReturned": 0,
            "executionTimeMillisEstimate": 0,
            "works": 0,
            "advanced": 0,
            "needTime": 0,
            "needYield": 0,
            "saveState": 0,
            "restoreState": 0,
            "isEOF": 1,
            "docsRejected": 0,
            "inputStage": {
                "stage": "FETCH",
                "nReturned": 0,
                "executionTimeMillisEstimate": 0,
                "works": 0,
                "advanced": 0,
                "needTime": 0,
                "needYield": 0,
                "saveState": 0,
                "restoreState": 0,
                "isEOF": 1,
                "docsExamined": 0,
                "alreadyHasObj": 0,
                "inputStage": { "stage": "OR", "nReturned": 0, "executionTimeMillisEstimate": 0, "works": 0, "advanced": 0, "needTime": 0, "needYield": 0, "saveState": 0, "restoreState": 0, "isEOF": 1, "dupsTested": 0, "dupsDropped": 0 }
            }
        }
    },
    "allPlansExecution": []
}

Here is the output of explain() for exact match of username ('akkcess'):

{
    "executionSuccess": true,
    "nReturned": 39,
    "executionTimeMillis": 1,
    "totalKeysExamined": 39,
    "totalDocsExamined": 39,
    "executionStages": {
        "stage": "TEXT",
        "nReturned": 39,
        "executionTimeMillisEstimate": 0,
        "works": 40,
        "advanced": 39,
        "needTime": 0,
        "needYield": 0,
        "saveState": 0,
        "restoreState": 0,
        "isEOF": 1,
        "indexPrefix": {},
        "indexName": "user_name_text",
        "parsedTextQuery": { "terms": ["akkcess"], "negatedTerms": [], "phrases": [], "negatedPhrases": [] },
        "textIndexVersion": 3,
        "inputStage": {
            "stage": "TEXT_MATCH",
            "nReturned": 39,
            "executionTimeMillisEstimate": 0,
            "works": 40,
            "advanced": 39,
            "needTime": 0,
            "needYield": 0,
            "saveState": 0,
            "restoreState": 0,
            "isEOF": 1,
            "docsRejected": 0,
            "inputStage": {
                "stage": "FETCH",
                "nReturned": 39,
                "executionTimeMillisEstimate": 0,
                "works": 40,
                "advanced": 39,
                "needTime": 0,
                "needYield": 0,
                "saveState": 0,
                "restoreState": 0,
                "isEOF": 1,
                "docsExamined": 39,
                "alreadyHasObj": 0,
                "inputStage": {
                    "stage": "OR",
                    "nReturned": 39,
                    "executionTimeMillisEstimate": 0,
                    "works": 40,
                    "advanced": 39,
                    "needTime": 0,
                    "needYield": 0,
                    "saveState": 0,
                    "restoreState": 0,
                    "isEOF": 1,
                    "dupsTested": 39,
                    "dupsDropped": 0,
                    "inputStage": {
                        "stage": "IXSCAN",
                        "nReturned": 39,
                        "executionTimeMillisEstimate": 0,
                        "works": 40,
                        "advanced": 39,
                        "needTime": 0,
                        "needYield": 0,
                        "saveState": 0,
                        "restoreState": 0,
                        "isEOF": 1,
                        "keyPattern": { "_fts": "text", "_ftsx": 1 },
                        "indexName": "user_name_text",
                        "isMultiKey": false,
                        "isUnique": false,
                        "isSparse": false,
                        "isPartial": false,
                        "indexVersion": 2,
                        "direction": "backward",
                        "indexBounds": {},
                        "keysExamined": 39,
                        "seeks": 1,
                        "dupsTested": 0,
                        "dupsDropped": 0
                    }
                }
            }
        }
    },
    "allPlansExecution": []
}

Do you have any idea why it behaves this way? According to docs and tutorials, this it should work.


Solution

  • "a" is almost surely a stop word. Almost every natural language text would include it. Therefore if it was searched for, you'd get every single document in the result set. Since this isn't very useful, text search drops stop words like "a" from the query.

    Separately, MongoDB text search does include exact matching functionality, but it requires the query to be quoted which you haven't done therefore you are using the regular stemmed matching, not exact matching in your posted query.