I am trying to write a simple parser method in Python. It takes in a filename pointing to a file of a certain format. An example of this format is below:
File:
.type = INFILE
.fmt = CFP_INPUTFILE_FMT_2
Data:
.cases
.given = True
.numCases = 2
.case
.numlines = 2
.line
.value = 3
.value = 3
.line
.value = 3
.value = 3
.case
.line
.value = 3
.value = 3
.line
.value = 3
.value = 3
The parser reads this file and outputs the raw data to a terminal via subprocess.run()
and prints the output. The data printed for the file should look like this:
2
33
33
33
33
The method itself looks something like this:
@classmethod
def input_file_fmt_1_to_input(cls, inputfile: str):
with open('temp.txt', 'w') as temp:
try:
with open(inputfile, 'r') as file:
case = 0
line = 0
case1line1 = ''
case1line2 = ''
case2line1 = ''
case2line2 = ''
for lne in file.readlines():
cleanline = lne.lstrip().rstrip()
if cleanline.startswith('.numCases'):
split = cleanline.split(' ')
value = str(split[2]) + '\n'
temp.write(value)
elif cleanline.startswith('.case'):
case += 1
elif cleanline.startswith('.line'):
line += 1
elif cleanline.startswith('.value') and case == 1 and line == 1:
case1line1 = case1line1 + str(cleanline.split(' ')[2]) + ' '
elif cleanline.startswith('.value') and case == 1 and line == 2:
case1line2 = case1line2 + str(cleanline.split(' ')[2]) + ' '
elif cleanline.startswith('.value') and case == 2 and line == 1:
case2line1 = case2line1 + str(cleanline.split(' ')[2]) + ' '
elif cleanline.startswith('.value') and case == 2 and line == 2:
case2line2 = case2line2 + str(cleanline.split(' ')[2]) + ' '
else:
pass
if case1line1 != '':
temp.write(case1line1)
temp.write('\n')
elif case1line2 != '':
temp.write(case1line2)
temp.write('\n')
elif case1line3 != '':
temp.write(case2line1)
temp.write('\n')
elif case1line4 != '':
temp.write(case2line2)
temp.write('\n')
result = run('cat temp.txt', shell=True, capture_output=True)
print(result.stdout)
except FileNotFoundError as e:
raise CfpRuntimeError from e
I've written a pytest test for this method, which looks like this:
def test_input_file_fmt_1_to_input_test(capsys):
cfp_testcontext.InputParser.input_file_fmt_1_to_input('../../TEST/modules_test/testfile.cfpin')
captured = capsys.readouterr()
assert captured.out == '2\n3 3 \n3 3 \n3 3 \n3 3 '
When I run the test, I get an assertion error:
assert "b''\nb''\nb''\nb''\n" == "2\n3 3 \n3 3 \n3 3 \n3 3 "
I have tried writing a a fake method and fake test using capsys
to see if the output was empty, and if it was a byte string. The output was a byte string, so I believe that the conversion is happening from within pytest. However, the function, which wrote 'hello' to the console, worked perfectly, so I am pretty sure there is something wrong with my logic.
The issue is that:
result = subprocess.run(['cat', 'temp.txt'], capture_output=True)
returns a CompletedProcess[bytes]
, so result.stdout
is a byte string, not a regular string.
You can fix this by passing text=True
:
result = subprocess.run(['cat', 'temp.txt'], capture_output=True, text=True)
I’d recommend avoiding subprocess
altogether here. Running an external process just to read a file is unnecessary — I would say a bit cringe. A better approach is to separate concerns:
_parse_lines(inputfile: str) -> list[str]
– parses the file and returns the output as a list of strings.input_file_fmt_1_to_input(...)
– handles writing to temp.txt
and printing, without using cat
.from subprocess import run
import subprocess
from typing import Iterator
class CfpRuntimeError(Exception):
pass
from subprocess import run
class CfpRuntimeError(Exception):
pass
class InputParser:
@staticmethod
def _parse_lines(inputfile: str) -> list[str]:
try:
with open(inputfile, 'r') as file:
output_lines = []
current_case = []
num_cases = 0
inside_line = False
for line in file:
match line.strip().split('='):
case '.numCases ', value:
num_cases = int(value.strip())
output_lines.append(str(num_cases))
case '.line', *_:
if current_case:
output_lines.append(' '.join(current_case))
current_case = []
inside_line = True
case '.value ', val:
if inside_line:
current_case.append(val.strip())
case '.case', *_:
if current_case:
output_lines.append(' '.join(current_case))
current_case = []
inside_line = False
case _:
continue
if current_case:
output_lines.append(' '.join(current_case))
return output_lines
except FileNotFoundError as e:
raise CfpRuntimeError from e
@classmethod
def input_file_fmt_1_to_input(cls, inputfile: str):
output_lines = cls._parse_lines(inputfile)
with open('temp.txt', 'w') as temp:
for line in output_lines:
temp.write(line + '\n')
result = ''.join(output_lines)
print(result.stdout, end='')
InputParser.input_file_fmt_1_to_input('example.txt')
import pytest
def test_input_file_fmt_1_to_input_test(capsys: pytest.CaptureFixture):
InputParser.input_file_fmt_1_to_input('example.txt')
captured = capsys.readouterr()
assert captured.out == '2\n3 3\n3 3\n3 3\n3 3\n'