pythonpython-3.xregex

How to search and match and corrent exact word in text file


test.txt file contains 2 misspelled words.

loyai

royai

pain

I want to search for the misspelled words and replace them using dict words_. My problem is if a misspelled word (pai) is part of another word (pain) it corrects pain to paln when it should ignore it.

How can I have my code ignore words that are not a full match to the misspelled word?

#!/usr/bin/env python3   
import re

words_ = {"loyai":"loyal", "royai":"royal", "pai":"pal"}
 
class srt_clean(object):
    def __init__(self, file_name):
        self.file_name = file_name

    def replace_words_(self, file_name):

        with open(file_name, 'r') as file: 
            data = file.read()

            for search_text, replace_text in words_.items():
                data = re.sub(search_text, replace_text, data)

        # Opening our text file in write only ode to write the replaced content 
        with open(file_name, 'w') as file: 
  
            # Writing the replaced data in our ext file 
            file.write(data) 
        

file_name = "test.srt"
clean = srt_clean(file_name)
clean.replace_words_(file_name)

Solution

  • Use word boundaries to position the anchor (\b) to match the entire word.

                for search_text, replace_text in words_.items():
                    data = re.sub(rf'\b{search_text}\b', replace_text, data)