I need to extract the time stamps from a .txt, split() down to just the hour, and print the hour(sorted) and occurrences. so far I have done this several ways and pytest never matches (but it looks the same in my terminal) from a previous exercise (below) I'm making the assumption that I need to accomplish the operations in a single block. so far, I have been using 2 or 3 blocks (in different versions to accomplish the desired output)
pytest will pass this code from a previous exercise
def exercise_8_5():
count = 0
fname = "mbox-short.txt"
if len(fname) < 1:
fname = "mbox-short.txt"
fh = open(fname)
for line in fh:
word = line.split()
if line.startswith('From '):
print(word[1])
count += 1
print("There were", count, "lines in the file with From as the first word")
From Readme.md instruction:
I've tried various other outputs using .join for a single string, using '\n' instead of printing from a loop, using Counter() on a list, various data structures I could think of, always looks the same but doesn't pass
04 3 ('04', 3)
06 1 ('06', 1)
07 1 ('07', 1)
09 2 ('09', 2)
10 3 ('10', 3)
11 6 ('11', 6)
14 1 ('14', 1)
15 2 ('15', 2)
16 4 ('16', 4)
17 2 ('17', 2)
18 1 ('18', 1)
19 1 ('19', 1)
def exercise_10_2():
lst = list()
lst2 = list()
dict = {}
#name = input("Enter file:")
name = 'mbox-short.txt'
if len(name) < 1:
name = "mbox-short.txt"
handle = open(name)
for line in handle:
if line.__contains__('From '):
word = line.split()
time = word[5].split()
hr = word[5].format_map(time)
lst.append(hr)
for time in lst:
time.split()
lst2.append(time[0:2])
lst2.sort()
for sent_time in lst2:
if sent_time in dict:
dict[sent_time] += 1
else:
dict[sent_time] = 1
for item in list(sorted(dict.items())):
print(item)
output: (output gets printed one index per line, but Ive tried printing the list on one line with no luck) ('04', 3) ('06', 1) ('07', 1) ('09', 2) ('10', 3) ('11', 6) ('14', 1) ('15', 2) ('16', 4) ('17', 2) ('18', 1) ('19', 1)
assert captured_output.out.strip() in ['04 3\n06 1\n07 1\n09 2\n10 3\n11 6\n14 1\n15 2\n16 4\n17 2\n18 1\n19 1'], "Incorrect output. Expected output found in README.md"
========================= FAILURES ===============================
___________________ test_exercise_10_2_output ____________________
monkeypatch = <_pytest.monkeypatch.MonkeyPatch object at 0x10205f470>,
capsys = <_pytest.capture.CaptureFixture object at 0x102075400>
def test_exercise_10_2_output(monkeypatch, capsys):
responses = iter(['mbox-short.txt'])
monkeypatch.setattr('builtins.input', lambda msg: next(responses))
exercise_10_2()
captured_output = capsys.readouterr()
assert captured_output.out.strip()
in ['04 3\n06 1\n07 1\n09 2\n10 3\n11 6\n14 1\n15 2\n16 4\n17 2\n18 1\n19 1'],
"Incorrect output.
Expected output found in README.md"
E AssertionError: Incorrect output. Expected output found in README.md
E assert "('04', '3')('06', '1')('07', '1')('09', '2')('10', '3')
('11', '6')('14', '1')('15', '2')('16', '4')('17', '2')('18', '1')('19', '1')"
in ['04 3\n06 1\n07 1\n09 2\n10 3\n11 6\n14 1\n15 2\n16 4\n17 2\n18 1\n19 1']
E + where "('04', '3')('06', '1')('07', '1')('09', '2')('10', '3')
('11', '6')('14', '1')('15', '2')('16', '4')('17', '2')('18', '1')('19', '1')"
= <built-in method strip of str object at 0x101f8faa0>()
E + where <built-in method strip of str object at 0x101f8faa0>
= "('04', '3')('06', '1')('07', '1')('09', '2')('10', '3')('11', '6')('14', '1')
('15', '2')('16', '4')('17', '2')('18', '1')('19', '1')\n".strip
E + where "('04', '3')('06', '1')('07', '1')('09', '2')('10', '3')
('11', '6')('14', '1')('15', '2')('16', '4')('17', '2')('18', '1')('19', '1')\n"
= CaptureResult(out="('04', '3')('06', '1')('07', '1')('09', '2')('10', '3')
('11', '6' ('14', '1')('15', '2')('16', '4')('17', '2')
('18', '1')('19', '1')\n", err='').out
assignment_test.py:13: AssertionError
==================================== short test summary info ===================
FAILED assignment_test.py::test_exercise_10_2_output - AssertionError: Incorrect
output. Expected output found in README.md
['09:14:16']
['18:10:48']
['16:10:39']
['15:46:24']
['15:03:18']
['14:50:18']
['11:37:30']
['11:35:08']
['11:12:37']
['11:11:52']
['11:11:03']
['11:10:22']
['10:38:42']
['10:17:43']
['10:04:14']
['09:05:31']
['07:02:32']
['06:08:27']
['04:49:08']
['04:33:44']
['04:07:34']
['19:51:21']
['17:18:23']
['17:07:00']
['16:34:40']
['16:29:07']
['16:23:48']
You're printing a tuple rather than the tuple's contents.
from collections import defaultdict
with open("mbox-short.txt") as txt:
d = defaultdict(int)
for line in txt:
if "From " in line:
tokens = line.split()
try:
hh, _, _ = tokens[5].split(":")
d[hh] += 1
except (IndexError, ValueError):
print(f"Malformed line -> {line}")
for t in sorted(d.items()):
print(*t)
There are various assumptions made in this code based on what's given in the question. For example, "From " in line is highly dubious. Not sure what the intent is here. I suspect that relevant lines should either start with "From " or a specific token should be equal to "From"