pythonpython-3.xgoogle-colaboratorykeyword-search

Multiple words in single keyword and counting them in the data in python


I'm trying to run the following code in python in order to count the keywords in the specific values of my dictionary. Suppose my keywords = ['is', 'my'] and it works fine for me but when my keywords are keywords = ['is', 'my name'] then it doesn't count the keyword my name. I don't know what mistake I'm doing. if anyone can see the code and help me out. thank you

from collections import Counter
import json 
from typing import List, Dict


keywords = ['is', 'my name']

def get_keyword_counts(text: str, keywords: List[str]) -> Dict[str, int]:
    return {
        word: count for word, count in Counter(text.split()).items()
        if word in set(keywords)
    }

    data = {
        "policy": {
            "1": {
                "ID": "ML_0",
                "URL": "www.a.com",
                "Text": "my name is Martin and here is my code"
            },
            "2": {
                "ID": "ML_1",
                "URL": "www.b.com",
                "Text": "my name is Mikal and here is my code"
            }
        }
    }
    
    for policy in data['policy'].values():
        policy.update(get_keyword_counts(policy['Text'], keywords))
    print(json.dumps(data, indent=4))


Solution

  • The substring "my name" is also splitted in get_keyword_counts so there is no actual value "my name", they are apart: "my" and "name". I guess you want to count it as a whole, so there is what you need:

    def get_keyword_counts(text: str, keywords: List[str]) -> Dict[str, int]:
        return {
            word: text.count(word) for word in keywords
        }