I have two datasets/dataframes df1
and df2
, I want to generate df3
by finding the difference between numeric columns of df2 and df1's column_X.
#### copy and paste below to generate df1 and df2
import pandas as pd
from random import uniform
import numpy as np
# generation of df1
data = np.random.uniform(15,40, size=(60, 2))
df1 = pd.DataFrame(data, columns=['column_A','column_B'])
df1['column_X'] = df1.mean(axis=1)
df1
# generation of df2
data = np.random.uniform(10.5,32.8, size=(60, 30))
df2 = pd.DataFrame(data, columns=['column_1','column_2','column_3','column_4','column_5',
'column_6','column_7','column_8','column_9','column_10',
'column_11','column_12','column_13','column_14','column_15',
'column_16','column_17','column_18','column_19','column_20',
'column_21','column_22','column_23','column_24','column_25',
'column_26','column_27','column_28','column_29','column_30',])
df2["Group"] = pd.DataFrame(np.repeat(['A','B','C'], 20, axis=0))
# make "Group" column the first column
col = df2.pop('Group')
df2.insert(0, 'Group', col)
df2
I want to generate df3 by substracting df2's numeric columns (column_1 to column_30) from df1's Column_X while retaining the "Group" column
# Step 1: create an empty df3 and then append df2['Group']
df3 = pd.DataFrame()
# substract "column_X from each numeric column
df3['col_1X_sub'] = df2['column_1'] - df1['column_X']
df3['col_2X_sub'] = df2['column_2'] - df1['column_X']
df3['col_3X_sub'] = df2['column_3'] - df1['column_X']
.
.
.
df3['col_30X_sub'] = df2['column_30'] - df1['column_X']
final df3 should look something like this for all 30 columns
Copy and then broadcast-subtract:
import pandas as pd
import numpy as np
np.random.seed(0)
a, b = ab = np.random.uniform(low=15, high=40, size=(2, 60))
df1 = pd.DataFrame({
'column_A': a,
'column_B': b,
'column_X': ab.mean(axis=0),
})
df2 = pd.DataFrame(
data=np.random.uniform(low=10.5, high=32.8, size=(60, 30)),
columns=[f'column_{i}' for i in range(1, 31)],
)
df2.insert(
loc=0, column='Group',
value=np.repeat(['A','B','C'], 20, axis=0),
)
df3 = df2.copy()
df3.iloc[:, 1:] -= df1['column_X'].values[:, np.newaxis]