pythonpandasdataframeformatstreamlit

What is the correct order and way to format a dataframe both using colors, and numbers formatting?


I am using python/panda and making a visualization under Streamlit, working under local environment.

I have a dictionary of dataframes, and each dataframe contain text & numbers as strings.

dataframe formatted

Due to the diversity in terms of numbers, I want to format my dataframes to be more user friendly, example: 1 M (million) instead of 1,000,000

I also want to add some conditional formatting by changing the output text color depending on the values: for example green if the ['P/E'] is < 25.

Here I am stuck:

The below code is the one allowing this screenshot, which means the values with color formatting find the errors='coerce' due to non-numeric formatting.

Your help will be appreciated !

    def apply_conditional_formatting(df, column_rules):
        styled_df = df.style
        # Example: Apply green text if P/E < 25
        if 'P/E' in df.columns:
            df['P/E'] = pd.to_numeric(df['P/E'], errors='coerce')
            styled_df = styled_df.map(
                lambda val: 'color: green' if isinstance(val, (int, float)) and val < 25 else '', 
                subset=['P/E']
            )
        if 'Market Cap' in df.columns:
            df['Market Cap'] = pd.to_numeric(df['Market Cap'], errors='coerce')
            styled_df = styled_df.map(
                lambda val: 'color: green' if isinstance(val, (int, float)) and val > 1000000 else '', 
                subset=['Market Cap']
            )
        # Extend the logic to other rules here...
        return styled_df
    
    def apply_formatting_to_dataframes(dataframes, column_rules):
        # Apply conditional formatting to a dictionary of DataFrames.
        styled_dataframes = {}
    
        for section, df in dataframes.items():
            styled_dataframes[section] = apply_conditional_formatting(df, column_rules)
    
        return styled_dataframes

    def convert_to_readable(num):
        if num is None or num == 'N/A':
            return 'N/A'
        if isinstance(num, str):
            try:
                num = float(num)  # Try converting strings to float
            except ValueError:
                return num
    
        if num >= 1_000_000_000 or num <= -1_000_000_000:
            return f'{num / 1_000_000_000:.2f}B'
        elif num >= 1_000_000 or num <= -1_000_000:
            return f'{num / 1_000_000:.2f}M'
        elif num >= 1_000 or num <= -1_000:
            return f'{num / 1_000:.2f}K'
        else:
            return f'{num:.2f}'
    
    def convert_to_readable_dataframes(df):
        # Convert numeric values to a more readable format in the output df
        for column in df.columns:
            df[column] = df[column].apply(lambda x: convert_to_readable(x))
        return df

    for section, df in financial_sections_dataframes.items():
        readable_df = convert_to_readable_dataframes(df)
        formatted_df = apply_conditional_formatting(readable_df, column_rules)
#

Thank you in advance for providing guidance and solution,

example_df = pd.DataFrame({
    "Country": ["Netherlands", "France", "Luxembourg", "France"],
    "Market Cap": [142307622912, 1230484753, 12947592845, 987462847],
    "P/E": [33.66, 21.14, 22.87, 7.45],
    "Price": [131.28, 19.80, 22.76, 0.68],
    "Change": [-0.03, -0.02, -0.01, -0.01],
    "Volume": ["1091234", "326568", "629141", "400476"]
})

# Assuming this would be part of a larger dictionary
dataframes = {
    "Example Section": example_df
}

Solution

  • Pandas differentiates between the display value (what you see in visualization) and the actual value (actual underlying data). We want to store our data as the original raw numbers, and only change the display values via the Styler interface:

    # Restored volume back to ints
    example_df = pd.DataFrame({
        "Country": ["Netherlands", "France", "Luxembourg", "France"],
        "Market Cap": [142307622912, 1230484753, 12947592845, 987462847],
        "P/E": [33.66, 21.14, 22.87, 7.45],
        "Price": [131.28, 19.80, 22.76, 0.68],
        "Change": [-0.03, -0.02, -0.01, -0.01],
        "Volume": [1091234, 326568, 629141, 400476]
    })
    
    def short_form(val):
        if not val:
            return "N/A"
        
        if val > 1e9:
            return f"{val/1e9:.2f}B"
        elif val > 1e6:
            return f"{val/1e6:.2f}M"
        elif val > 1000:
            return f"{val/1000:.2f}K"
    
    def highlight(col, threshold = 25):
        return ["color: green" if val > threshold else "" for val in col]
    
    example_df.style.apply(highlight, subset = "P/E", threshold = 25).format(short_form, subset = "Market Cap").format(precision = 2, subset = ["P/E", "Price", "Change"])
    

    Meanwhile example_df.values still retains the original data, allowing further manipulation:

    array([['Netherlands', 142307622912, 33.66, 131.28, -0.03, 1091234],
           ['France', 1230484753, 21.14, 19.8, -0.02, 326568],
           ['Luxembourg', 12947592845, 22.87, 22.76, -0.01, 629141],
           ['France', 987462847, 7.45, 0.68, -0.01, 400476]], dtype=object)