First of all, I tried writing directly to a blob. But that didn't work. So, I tried writing to a temp directory and then moving the file to the required directory. Even this didn't work. I am looking for a solution to write excels with multiple sheets to Azure Blob.
filename = os.path.join(arg_dict['out_dir'], old_attribute_file_path.replace(old_attribute_file_path.split('/')[-1].split('-')[-1].split('.')[0], attribute_files[0].split('-')[1]))
temp_file_name = os.path.join(TMP_PATH, old_attribute_file_path.replace(old_attribute_file_path.split('/')[-1].split('-')[-1].split('.')[0], attribute_files[0].split('-')[1]))
fill_color = PatternFill(start_color='FFFF00', end_color='FFFF00', fill_type='solid')
# Write DataFrames to Excel
with pd.ExcelWriter(temp_file_name, engine='openpyxl') as writer:
df1.to_excel(writer, index=False, sheet_name='Sheet1')
df2.to_excel(writer, index=False, sheet_name='Sheet2')
# Load the workbook
workbook = writer.book
# Save the workbook
workbook.save(temp_file_name)
shutil.move(temp_file_name, filename)
The error that I get ->
File /local_disk0/.ephemeral_nfs/envs/pythonEnv-6a81eac8-0226-477b-9715-070566214b43/lib/python3.10/site-packages/openpyxl/writer/excel.py:294, in save_workbook(workbook, filename)
292 workbook.properties.modified = datetime.datetime.utcnow()
293 writer = ExcelWriter(workbook, archive)
--> 294 writer.save()
295 return True
File /local_disk0/.ephemeral_nfs/envs/pythonEnv-6a81eac8-0226-477b-9715-070566214b43/lib/python3.10/site-packages/openpyxl/writer/excel.py:275, in ExcelWriter.save(self)
273 def save(self):
274 """Write data into the archive."""
--> 275 self.write_data()
276 self._archive.close()
File /local_disk0/.ephemeral_nfs/envs/pythonEnv-6a81eac8-0226-477b-9715-070566214b43/lib/python3.10/site-packages/openpyxl/writer/excel.py:60, in ExcelWriter.write_data(self)
57 archive = self._archive
59 props = ExtendedProperties()
---> 60 archive.writestr(ARC_APP, tostring(props.to_tree()))
62 archive.writestr(ARC_CORE, tostring(self.workbook.properties.to_tree()))
63 if self.workbook.loaded_theme:
File /usr/lib/python3.10/zipfile.py:1816, in ZipFile.writestr(self, zinfo_or_arcname, data, compress_type, compresslevel)
1814 zinfo.file_size = len(data) # Uncompressed size
1815 with self._lock:
-> 1816 with self.open(zinfo, mode='w') as dest:
1817 dest.write(data)
File /usr/lib/python3.10/zipfile.py:1182, in _ZipWriteFile.close(self)
1180 self._fileobj.seek(self._zinfo.header_offset)
1181 self._fileobj.write(self._zinfo.FileHeader(self._zip64))
-> 1182 self._fileobj.seek(self._zipfile.start_dir)
1184 # Successfully written: Add file to our caches
1185 self._zipfile.filelist.append(self._zinfo)
OSError: [Errno 95] Operation not supported
openpyxl and xlsxwriter are both used for pandas dataframes. You can use the spark plugin to write to excel directly to blob storage
You can install this on the cluster level like so
And you code will look like this. First get your access key and set it as a spark config.
spark.conf.set(
"fs.azure.account.key.<storage_name>.dfs.core.windows.net",
dbutils.secrets.get(scope=<scope_name>, key=<access_key>))
Next, set the path and write your spark dataframes to the same path but different worksheets.
path = "abfss://<container_name>@<storage_name>.dfs.core.windows.net/testdir/test.xlsx"
spark_Df1.write.format("com.crealytics.spark.excel")\
.option("header", "true")\
.option("dataAddress", "'My Sheet1'!A1")\
.mode("append")\
.save(path)
spark_Df2.write.format("com.crealytics.spark.excel")\
.option("header", "true")\
.option("dataAddress", "'My Sheet2'!A1")\
.mode("append")\
.save(path)