import pandas as pd
concordance_region = pd.DataFrame(
{
"country 1": pd.Series([1, 0], index=["region a", "region b"]),
"country 2": pd.Series([0, 1], index=["region a", "region b"]),
"country 3": pd.Series([0, 1], index=["region a", "region b"]),
}
)
display(concordance_region)
country_index = concordance_region.columns
region_index = concordance_region.index
sector_index = ['sector a','sector b']
country_sector = pd.MultiIndex.from_product([country_index, sector_index], names=["country", "sector"])
region_sector = pd.MultiIndex.from_product([region_index, sector_index], names=["region", "sector"])
concordance_region_expanded = pd.DataFrame([[1,0,0,0,0,0],[0,1,0,0,0,0],[0,0,1,0,1,0],[0,0,0,1,0,1]], index=region_sector, columns=country_sector)
display(concordance_region_expanded)
I want to achieve the above expansion without hard-coding the number.
An option is that:
concordance_region_extended = pd.DataFrame(index=region_sector, columns=country_sector)
for region in region_index:
for sector_1 in sector_index:
for country in country_index:
for sector_2 in sector_index:
if sector_1 == sector_2 and concordance_region.loc[region, country] == 1:
concordance_region_expanded.loc[(region, sector_1),(country, sector_2)] = 1
concordance_region_expanded = concordance_region_expanded.fillna(value=0).infer_objects(copy=False)
concordance_region_expanded
But I think the above code is neither efficient nor elegant.
Any way to solve the above problem?
Code
use np.kron and identity matrix (identity matrix can be created with np.eye.)
import pandas as pd
import numpy as np
# taken from questioner's code
sector_index = ['sector a', 'sector b']
country_sector = pd.MultiIndex.from_product(
[country_index, sector_index], names=["country", "sector"])
region_sector = pd.MultiIndex.from_product(
[region_index, sector_index], names=["region", "sector"])
# start answer
n = len(sector_index)
out = pd.DataFrame(
np.kron(concordance_region.values, np.eye(n)),
index=region_sector, columns=country_sector, dtype='int'
)
out