I am converting an xlsm file to csv using the openpyxl library. I have the problem that when I try to pass the list of lists to the csv python returns the following error:
Traceback (most recent call last): File "test.py", line 21, in listIntoCsv(finalList) File "test.py", line 16, in listIntoCsv write.writerows(list) UnicodeEncodeError: 'ascii' codec can't encode character u'\xfa' in position 20: ordinal not in range(128)
These are 2 example of 1 list inside the final list:
[
[u'0@jordilazo.com', u'SZHK@jordilazo.es, NFUW@jordilazo.es',
datetime.datetime(2022, 7, 18, 10, 7, 16),
1L, '0', 1L, 2L, 'NO',
None, 'test@jordilazo.es',
'TEST@jordilazo.es', None,
None, False, False, None,None,
False, False, False, None, None,
True, 'SI', 'N/A',3182L, 0L, None,
None, None, '#N/A', 'RE: N< >LWWM',
u'a9e5bbbb497', u'Com: \xd1<GR A', None]...
]
I am executing the code with python2. I have tried to apply different solutions so that the code looks like this:
from openpyxl import load_workbook
import csv
import codec
excelFile = load_workbook('test.xlsm', data_only=True)
currentSheet = excelFile.worksheets[0]
def iter_rows(currentSheet):
for row in currentSheet.iter_rows():
yield [cell.value for cell in row]
def listIntoCsv(list):
with codecs.open('test','w',encoding='UTF-8') as f:
write = csv.writer(f)
write.writerows(list)
finalList = list(iter_rows(currentSheet))
print(finalList)
listIntoCsv(finalList)
In order to change the type of each element inside the list we have to change them manually by changing their type to string due to python 2.7 does not support unicode format. So the solution is:
from types import NoneType
import unicodedata
from openpyxl import load_workbook
import csv
from datetime import datetime
import os
def dateTimeToUnixTIme(dateTime):
dataTimeObject = datetime.datetime.strptime(dateTime, "%Y-%m-%d
%H:%M:%S.%f")
unixTime = time.mktime(dataTimeObject.timetuple()) +
(dataTimeObject.microsecond / 1000000.0)
return unixTime
def iterRows(currentSheet):
for row in currentSheet.iter_rows():
yield [cell.value for cell in row]
def listIntoCsv(list):
with open('excel.csv','w') as f:
write = csv.writer(f)
write.writerows(list)
def decodeList(list):
for x,y in enumerate(list):
for j,element in enumerate(y):
if isinstance(element,unicode):
element = unicodedata.normalize('NFKD',
element).encode('ascii', 'ignore')
if (", ") in element and '@' in element:
element = element.replace(' ','')
element = element.split(",")
list[x][j] = element
list[x][j] = element
elif isinstance(element,date):
element = element.strftime('%Y-%m-%d %H:%M:%S.%f')
element = dateTimeToUnixTIme(element)
list[x][j] = str(element)
elif isinstance(element,(long,int,float)):
element = str(element)
list[x][j] = element
elif isinstance(element,NoneType):
element = str(element).replace('None','Nada')
list[x][j] = element
elif isinstance(element,bool):
element = str(element)
list[x][j] = element
elif isinstance(element,str):
element = str(element)
list[x][j] = element
else:
list[x][j] = element
return list
ubicationExcel = 'bin/mailList/Investigacion_DLP_enmascarado.xlsm'
excelFile = load_workbook(ubicationExcel, data_only=True)
currentSheet = excelFile.worksheets[0]
dirtyList = list(iterRows(currentSheet))
finalList = encodeList(dirtyList)
listIntoCsv(finalList)