I'm trying to read out data from multiple csv files in a folder, selecting specific column, adding the file name as an additional column and finally writing this to an existing excel.
The print output does what I'd like to do, but I don't seem to get the part working, where the data are attached to the excel sheet.
import requests
import os
import pandas as pd
import time
import xlwt
import glob
files = glob.glob("*.csv")
writer = pd.ExcelWriter('output.xls', engine='xlsxwriter')
writer.close()
for i in files:
df=pd.read_csv(i, usecols = ['column1', 'column2'])
df['Filename Column'] = i.split(".")[0]
df.to_csv(i.split(".")[0]+".csv")
print(df)
df_combined = pd.concat([df])
reader = pd.read_excel(r'output.xls')
df_combined.to_excel(writer, index=False, startrow=len(reader)+1)
The Excel remains empty, when I do it like that.
This code is what I've come up with.
import glob
import pandas as pd
from typing import List
if __name__ == "__main__":
final_df: pd.DataFrame = pd.read_excel("dataset.xlsx", engine="openpyxl")
files: List = glob.glob("*.csv")
for file in files:
added_df: pd.DataFrame = pd.read_csv(file, usecols=["column1", "column3"])
added_df["Filename"] = file.split(".")[0]
final_df: pd.DataFrame = pd.concat([final_df, added_df])
final_df.to_excel("dataset.xlsx", index=False)