i have the following code:
import pandas as pd
data = {
'name': ['Comp1', 'Comp1', 'Comp2', 'Comp2', 'Comp3'],
'entity_type': ['type1', 'type1', 'type2', 'type2', 'type3'],
'code': ['code1', 'code2', 'code3', 'code1', 'code2'],
'date': ['2024-01-31', '2024-01-31', '2024-01-29', '2024-01-31', '2024-01-29'],
'value': [10, 10, 100, 10, 200],
'source': [None, None, 'Estimated', None, 'Reported']
}
df = pd.DataFrame(data)
pivot_df = df.pivot(index='date', columns=['name', 'entity_type', 'source', 'code'], values='value').rename_axis([('name', 'entity_type', 'source', 'date')])
df = pivot_df.reset_index()
df
I am having trouble with the following:
Any help would be appreciated.
EDIT - working hack as this data i want to end up as an array that looks exactly in the dataframe to insert into a spreadsheet..this works. In this case however, there would be any meaningful 'columns'..
out = (df.pivot(index='date', columns=['name', 'entity_type', 'source', 'code'], values='value')
.rename_axis([('name', 'entity_type', 'source', 'date')])
.reset_index()
.fillna('')
)
out.columns.names = [None, None, None, None]
columns_df = pd.DataFrame(out.columns.tolist()).T
out = pd.concat([columns_df, pd.DataFrame(out.values)], ignore_index=True)
out
You could (1) reset_index
with drop=True
, (2) set the display.multi_sparse
to False
(with pandas.option_context
) and (3) fillna
with ''
:
df = pd.DataFrame(data)
out = (df.pivot(index='date', columns=['name', 'entity_type', 'source', 'code'],
values='value')
.rename_axis([('name', 'entity_type', 'source', 'date')])
.reset_index(drop=True)
.fillna('')
)
with pd.option_context('display.multi_sparse', False):
print(out)
Output:
name Comp1 Comp1 Comp2 Comp2 Comp3
entity_type type1 type1 type2 type2 type3
source NaN NaN Estimated NaN Reported
code code1 code2 code3 code1 code2
0 100.0 200.0
1 10.0 10.0 10.0
Printing without the index:
out = (df.pivot(index='date', columns=['name', 'entity_type', 'source', 'code'],
values='value')
.fillna('')
)
with pd.option_context('display.multi_sparse', False):
print(out.to_string(index=False))
Output:
Comp1 Comp1 Comp2 Comp2 Comp3
type1 type1 type2 type2 type3
NaN NaN Estimated NaN Reported
code1 code2 code3 code1 code2
100.0 200.0
10.0 10.0 10.0
Hiding the indices:
out = (df.pivot(index='date', columns=['name', 'entity_type', 'source', 'code'],
values='value')
.rename_axis(None)
.rename(lambda x: '')
.fillna('')
)
with pd.option_context('display.multi_sparse', False):
print(out)
Output:
name Comp1 Comp1 Comp2 Comp2 Comp3
entity_type type1 type1 type2 type2 type3
source NaN NaN Estimated NaN Reported
code code1 code2 code3 code1 code2
100.0 200.0
10.0 10.0 10.0
Updated answer: header as rows
out = (df.pivot(index='date', columns=['name', 'entity_type', 'source', 'code'],
values='value')
.rename_axis(index=None, columns=('name', 'entity_type', 'source', 'date'))
.fillna('')
.T.reset_index().T
.reset_index()
)
Output:
index 0 1 2 3 4
0 name Comp1 Comp1 Comp2 Comp2 Comp3
1 entity_type type1 type1 type2 type2 type3
2 source NaN NaN Estimated NaN Reported
3 date code1 code2 code3 code1 code2
4 2024-01-29 100.0 200.0
5 2024-01-31 10.0 10.0 10.0