Example dataframe:
name col1 col2 col3
bob bird 78 1000
alice cat 55 500,600,700
rob dog 333 20,30
Desired Dataframe that adds rows when col3 has comma delimited string values:
name col1 col2 col3
bob bird 78 1000
alice cat 55 500
alice cat 55 600
alice cat 55 700
rob dog 333 20
rob dog 333 30
Any suggestion is appreciated! thanks!
import pandas as pd
import numpy as np
class DataFrameExpander:
"""
Class for extending DataFrame by adding new rows when the values in a column contain comma-separated strings.
"""
def __init__(self, dataframe):
"""
Initializing a class with a DataFrame.
:param dataframe: Исходный DataFrame
"""
self.dataframe = dataframe
def expand_column(self, column_name):
"""
Method to extend a DataFrame by adding new rows for comma-separated values in the specified column.
:param column_name: The name of the column to split comma-separated values
:return: The new extended DataFrame
"""
# Use the explode method to efficiently split string values
df = self.dataframe.copy()
df[column_name] = df[column_name].apply(lambda x: str(x).split(','))
expanded_df = df.explode(column_name, ignore_index=True)
# Convert the values in the column back to a numeric type if possible
expanded_df[column_name] = pd.to_numeric(
expanded_df[column_name], errors='ignore')
return expanded_df
if __name__ == "__main__":
data = {
'name': ['bob', 'alice', 'rob'],
'col1': ['bird', 'cat', 'dog'],
'col2': [78, 55, 333],
'col3': ['1000', '500,600,700', '20,30']
}
df = pd.DataFrame(data)
expander = DataFrameExpander(df)
expanded_df = expander.expand_column('col3')
print(expanded_df)
name col1 col2 col3
0 bob bird 78 1000
1 alice cat 55 500
2 alice cat 55 600
3 alice cat 55 700
4 rob dog 333 20
5 rob dog 333 30