Can any one please help me with the reverse part of the regex? I got it almost right but the reverse is tricky because if I have an input as:
Input = dogs and cats or (white or black) or (cat and (red or blue))
Current Regex Output = dogs.{0,10}cats|(white|black)|(cat.{0,10}(red|blue)) "OK regex"
Current Regex Reverse Output = ))blue|red(.{0,10}cat(|)black|white(|cats.{0,10}dogs "It's totally wrong"
It should be: (blue|red).{0,10}cat|(black|white)|cats.{0,10}dogs
For some reason the parenthesis is messing up the whole reverse function. Thank you in advance.
import re
import os
def normalize_special_terms(text):
text = re.sub(r'\bli[\s-]?6\b', r'\\bli[-\\s]?6\\b', text, flags=re.IGNORECASE)
return text
def reverse_regex_order(regex):
# Reverse functionality for the regex output
def reverse_inside_parentheses(s):
# Reverse the order of terms inside parentheses not working properly
parts = re.split(r'(\.\{0,10\}|\||\(|\))', s)
stack = []
buffer = []
for part in parts:
if part == ')':
if buffer:
stack.append(''.join(buffer[::-1]))
buffer = []
stack.append(part)
elif part == '(':
stack.append(part)
if buffer:
stack.append(''.join(buffer[::-1]))
buffer = []
else:
buffer.append(part)
if buffer:
stack.append(''.join(buffer[::-1]))
return ''.join(stack)
terms = re.split(r'(\.\{0,100\}|\||\(|\))', regex)
reversed_terms = [reverse_inside_parentheses(term) if '(' in term or ')' in term else term for term in terms]
reversed_terms.reverse()
return ''.join(reversed_terms)
def text_to_regex(input_file, max_gap=100):
# Convert text from input file to regex
if not os.path.exists(input_file):
raise FileNotFoundError(f"Input '{input_file}' does not exist, check location.")
output_file = os.path.join(os.path.dirname(input_file), 'regex.txt')
output_reverse_file = os.path.join(os.path.dirname(input_file), 'regex_reverse.txt')
with open(input_file, 'r') as f:
lines = f.readlines()
regex_parts = []
for line in lines:
line = line.strip().lower()
line = normalize_special_terms(line)
terms = re.split(r'\s+(?:and|or)\s+', line)
operators = re.findall(r'\s+(and|or)\s+', line)
line_regex_parts = [terms[0]]
for i in range(1, len(terms)):
gap = f'.{{0,{max_gap}}}' if operators[i - 1] == 'and' else '|'
line_regex_parts.append(gap + terms[i])
regex_parts.append(''.join(line_regex_parts))
# Generate reversed regex
reversed_regex_parts = [reverse_regex_order(regex) for regex in regex_parts]
# Write regex file
with open(output_file, 'w') as f:
for regex in regex_parts:
f.write(regex + '\n')
# Write reversed regex file
with open(output_reverse_file, 'w') as f:
for regex in reversed_regex_parts:
f.write(regex + '\n')
return regex_parts, reversed_regex_parts
if __name__ == "__main__":
input_file = '/input.txt'
try:
original_regex, reversed_regex = text_to_regex(input_file)
print("Regex Output:")
print("\n".join(original_regex))
print("\nReversed Regex Output:")
print("\n".join(reversed_regex))
except Exception as e:
print(f"Error: {e}")
####
This code can just change the pattern to ((blue|red).{0,10}cat)|(black|white)|cats.{0,10}dogs
.
import re
pattern = r'dogs.{0,10}cats|(white|black)|(cat.{0,10}(red|blue))'
reg = re.compile(r'\w+|(\.\{\d+,\d+\})|[\(\)\|]')
symbols = {
')': '(',
'(': ')'
}
results = []
match = reg.search(pattern)
end = 0
while match:
_m = match[0] if match[0] not in symbols else symbols[match[0]]
results.append(_m)
end = len(_m)
pattern = pattern[end:]
match = reg.search(pattern)
print(''.join(results[::-1])) # ((blue|red).{0,10}cat)|(black|white)|cats.{0,10}dogs