Can someone please explain me why there is 'b' in front of each word and how to get read of it? Script returns something like this:
word= b'yesterday,' , count = 3
current_word = {}
current_count = 0
text = "https://raw.githubusercontent.com/KseniaGiansar/pythonProject2_text/master/yesterday.txt"
request = urllib.request.urlopen(text)
each_word = []
words = None
count = 1
same_words ={}
word = []
# сollect words into a list
for line in request:
#print "Line = " , line
line_words = line.split()
for word in line_words: # looping each line and extracting words
each_word.append(word)
for words in each_word:
if words.lower() not in same_words.keys() :
same_words[words.lower()]=1
else:
same_words[words.lower()]=same_words[words.lower()]+1
for each in same_words.keys():
print("word = ", each, ", count = ",same_words[each])
It is indicating that the variable words
is a bytes object.
urllib.request.urlopen()
returns a bytes object.
To fix this, you can use the .decode()
method to convert the bytes object to a string before appending it to the list.
for line in request:
line_words = line.decode().split() # decode the bytes object to a string
for word in line_words:
each_word.append(word)