I have a Dataframe which looks like this:
Period start time Segment Name KPI_1 KPI_2
04.13.2023 AI4301A 51.49 3.85
04.13.2023 AI4301B 193.79 1.56
04.13.2023 AI4301C 95.69 1.25
04.14.2023 AI4301A 59.47 2.42
04.14.2023 AI4301B 193.11 1.32
04.14.2023 AI4301C 81.90 1.94
04.15.2023 AI4301A 46.73 3.11
04.15.2023 AI4301B 179.33 1.47
04.15.2023 AI4301C 78.95 1.51
I want the Output to be like this without any empty row between the Segment Name Rows & AI4301A:
| KPI_1 | KPI_2 |
Segment Name 04.13.2023 04.14.2023 04.15.2023 04.13.2023 04.14.2023 04.15.2023
AI4301A 51.49 59.47 46.73 3.85 2.42 3.11
AI4301B 193.79 193.11 179.33 1.56 1.32 1.47
AI4301C 95.69 81.90 78.95 1.25 1.94 1.51
I should be able to add as many KPIs, Segment Name and Period start time as I want which gives above similar pattern
What I have tried so far:
df =(df.groupby(['Segment Name', pd.Grouper(freq='D', key='Period start time')]) ['Segment Name','KPI_1','KPI_2'] .sum() .unstack())
I am getting the output as follows :
KPI_1 KPI_2
Segment Name 04.13.2023 04.14.2023 04.15.2023 04.13.2023 04.14.2023 04.15.2023
AI4301A 51.49 59.47 46.73 3.85 2.42 3.11
AI4301B 193.79 193.11 179.33 1.56 1.32 1.47
AI4301C 95.69 81.90 78.95 1.25 1.94 1.51
The Problem is that after applying the above code neither i am not able to remove the blank row after the Header row nor able to perform any operation like merge another dataframe based on Segment Name. Please help Experts
Edit: Adding the Dataframe to be merged:
segmentName ID_1 ID_2
AI4301A AI_05 2741
AI4301B AI_05 2742
AI4301C AI_05 2743
To address your first question (while building on top of @Corralien's answer), you can use this :
#from @Corralien's answer + joining the 2nd dataframe
out = (df1.rename(columns={"KPI_1": "Sum of KPI_1", "KPI_2": "Sum of KPI_2"})
.pivot_table(index="Segment Name", columns="Period start time",
values=["Sum of KPI_1", "Sum of KPI_2"], aggfunc="sum")
.join(df2.set_index("segmentName").set_axis(
pd.MultiIndex.from_product([[''], df2.columns[1:]]), axis=1))
.set_index([("", "ID_1"), ("", "ID_2")], append=True)
.rename_axis(index=lambda x: x[1] if isinstance(x, tuple) else x)
.reset_index()
)
#making/formatting the spreadsheet
with pd.ExcelWriter("output.xlsx", engine="xlsxwriter") as writer:
out.loc[:-1].to_excel(writer, sheet_name="Sheet1") #the shape
(out.style.set_properties(**{"border":"1px solid black"})
.to_excel(writer, sheet_name="Sheet1", startrow=1, header=False)) #the data
workbook = writer.book
worksheet = writer.sheets["Sheet1"]
worksheet.set_column("A:A", None, None, {"hidden": True}) #to hide the index column
lvl0_format = workbook.add_format(
{"bold": True, "border": 1, "bg_color": "#dce6f1", "align": "center"}
)
for tup in [(idx_col, col_name) for idx_col, col_name
in enumerate(out.columns.get_level_values(0))][3::3]:
worksheet.write(0, tup[0]+1, tup[1], lvl0_format)
lvl1_format = workbook.add_format(
{"bold": True, "border": 1, "bg_color": "#dce6f1", "rotation": 90}
)
for idx_col, col_name in enumerate(out.columns.get_level_values(1)[1:]):
worksheet.write(1, idx_col+2, col_name, lvl1_format)
white_format = workbook.add_format({"bg_color": "white"})
sgn_format = workbook.add_format(
{"bold": True, "bg_color": "#124191", "color": "white", "border": 1}
)
ids_format = workbook.add_format(
{
"bold": True, "bg_color": "#0070c0", "color": "white",
"border": 1, "valign": "vcenter", "align": "center"
}
)
for tup in [(idx_col, col_name) for idx_col, col_name
in enumerate(out.columns.get_level_values(0))][:3]:
worksheet.write(0, tup[0]+1, None, white_format)
if tup[1] == "Segment Name":
worksheet.write(1, tup[0]+1, tup[1], sgn_format)
else:
worksheet.write(1, tup[0]+1, tup[1], ids_format)
worksheet.autofit()
Output :