#Column X contains the suffix of one of V* columns. Need to put set column V{X} to 9 if X > 1.
#But my code created a new column 'VX' instead of updating one of the V* columns
import pandas as pd
df = pd.DataFrame({'EMPLID': [12, 13, 14, 15, 16, 17, 18],
'V1': [2,3,4,50,6,7,8],
'V2': [3,3,3,3,3,3,3],
'V3': [7,15,8,9,10,11,12],
'X': [2,3,1,3,3,1,2]
})
# Expected output:
# EMPLID V1 V2 V3 X
# 12 2 9 7 2
# 13 3 3 9 3
# 14 4 3 8 1
# 15 50 3 9 3
# 16 6 3 9 3
# 17 7 3 11 1
# 18 8 9 12 2
My code created a new column 'VX' instead of updating one of the V* columns:
df.loc[(df['X'] > 1), f"V{'X'}"] = 9
Any suggestion is appreciated. Thank you.
# your dataframe
df = pd.DataFrame({'EMPLID': [12, 13, 14, 15, 16, 17, 18],
'V1': [2,3,4,50,6,7,8],
'V2': [3,3,3,3,3,3,3],
'V3': [7,15,8,9,10,11,12],
'X': [2,3,1,3,3,1,2]
})
First, we get the columns names that we want to change and their indices on the original dataframe.
# column name
x = df['X'][(df['X'] > 1)]
# column names mapped to your scenario
columns = [f'V{v}' for v in x]
# desired indexes
positions = x.index.values
#Then we convert the column names to indices and use these indices to update the positions matching the conditions.
column_indices = [df.columns.get_loc(col) for col in columns]
Now, we can use two approaches here.
Convert the dataframe to numpy array, change the desired positions all at once and change the result back to a dataframe.
import numpy as np
# the original dataframe column names
column_names = df.columns
# convert the dataframe to numpy (this will 'remove' the column names)
df_array = df.values
# put the columns back (axis=0 will stack the columns at the top of the array)
df_array = np.concatenate([[column_names], df_array], axis=0)
# position+1 because when using pandas to get the row index, we ignore the columns (which would have index 0)
df_array[positions+1, column_indices] = 9
# convert the result back to a dataframe
df = pd.DataFrame(df_array[1:], columns=column_names)
print(df)
Output:
EMPLID V1 V2 V3 X
0 12 2 9 7 2
1 13 3 3 9 3
2 14 4 3 8 1
3 15 50 3 9 3
4 16 6 3 9 3
5 17 7 3 11 1
6 18 8 9 12 2
The easiest way would be just to loop over the rows and columns, changing one value at a time.
for row, column in zip(positions, column_indices):
df.iat[row,column] = 9
If your dataframe is small, the vectorized approach won't have as much of an advantage over the for loop.