pythonregexreverse

Tricky Reverse Regex Python 3.11


Can any one please help me with the reverse part of the regex? I got it almost right but the reverse is tricky because if I have an input as:

Input = dogs and cats or (white or black) or (cat and (red or blue))

Current Regex Output = dogs.{0,10}cats|(white|black)|(cat.{0,10}(red|blue)) "OK regex"

Current Regex Reverse Output = ))blue|red(.{0,10}cat(|)black|white(|cats.{0,10}dogs "It's totally wrong"

It should be: (blue|red).{0,10}cat|(black|white)|cats.{0,10}dogs

For some reason the parenthesis is messing up the whole reverse function. Thank you in advance.

import re
import os

def normalize_special_terms(text):
    text = re.sub(r'\bli[\s-]?6\b', r'\\bli[-\\s]?6\\b', text, flags=re.IGNORECASE)
    return text

def reverse_regex_order(regex):
    
    # Reverse functionality for the regex output
    
    def reverse_inside_parentheses(s):
        # Reverse the order of terms inside parentheses not working properly 
        parts = re.split(r'(\.\{0,10\}|\||\(|\))', s)
        stack = []
        buffer = []
        
        for part in parts:
            if part == ')':
                if buffer:
                    stack.append(''.join(buffer[::-1]))
                    buffer = []
                stack.append(part)
            elif part == '(':
                stack.append(part)
                if buffer:
                    stack.append(''.join(buffer[::-1]))
                    buffer = []
            else:
                buffer.append(part)
        
        if buffer:
            stack.append(''.join(buffer[::-1]))
        
        return ''.join(stack)
    
    terms = re.split(r'(\.\{0,100\}|\||\(|\))', regex)
    reversed_terms = [reverse_inside_parentheses(term) if '(' in term or ')' in term else term for term in terms]
    reversed_terms.reverse()
    
    return ''.join(reversed_terms)

def text_to_regex(input_file, max_gap=100):
    
    # Convert text from input file to regex
    
    if not os.path.exists(input_file):
        raise FileNotFoundError(f"Input '{input_file}' does not exist, check location.")
    
    output_file = os.path.join(os.path.dirname(input_file), 'regex.txt')
    output_reverse_file = os.path.join(os.path.dirname(input_file), 'regex_reverse.txt')
    
    with open(input_file, 'r') as f:
        lines = f.readlines()
    
    regex_parts = []
    
    for line in lines:
        line = line.strip().lower()
        line = normalize_special_terms(line)
        
        terms = re.split(r'\s+(?:and|or)\s+', line)
        operators = re.findall(r'\s+(and|or)\s+', line)
        
        line_regex_parts = [terms[0]]
        
        for i in range(1, len(terms)):
            gap = f'.{{0,{max_gap}}}' if operators[i - 1] == 'and' else '|'
            line_regex_parts.append(gap + terms[i])
        
        regex_parts.append(''.join(line_regex_parts))
    
    # Generate reversed regex  
    reversed_regex_parts = [reverse_regex_order(regex) for regex in regex_parts]
    
    # Write regex file
    with open(output_file, 'w') as f:
        for regex in regex_parts:
            f.write(regex + '\n')
    
    # Write reversed regex file
    with open(output_reverse_file, 'w') as f:
        for regex in reversed_regex_parts:
            f.write(regex + '\n')
    
    return regex_parts, reversed_regex_parts

if __name__ == "__main__":
    input_file = '/input.txt'
    try:
        original_regex, reversed_regex = text_to_regex(input_file)
        print("Regex Output:")
        print("\n".join(original_regex))
        print("\nReversed Regex Output:")
        print("\n".join(reversed_regex))
    except Exception as e:
        print(f"Error: {e}")
####

Solution

  • This code can just change the pattern to ((blue|red).{0,10}cat)|(black|white)|cats.{0,10}dogs.

    import re
    
    
    pattern = r'dogs.{0,10}cats|(white|black)|(cat.{0,10}(red|blue))'
    
    reg = re.compile(r'\w+|(\.\{\d+,\d+\})|[\(\)\|]')
    symbols = {
        ')': '(',
        '(': ')'
    }
    results = []
    match = reg.search(pattern)
    end = 0
    while match:
        _m = match[0] if match[0] not in symbols else symbols[match[0]]
        results.append(_m)
        end = len(_m)
        pattern = pattern[end:]
        match = reg.search(pattern)
    
    print(''.join(results[::-1]))  # ((blue|red).{0,10}cat)|(black|white)|cats.{0,10}dogs