pythonexcelpandasdataframestyleframe

Colour specific cells from two columns that don't match, using python pandas style.where (or otherwise) and export to excel


I am looking to colour specific cells from two columns that don't match, but would like to use it with python pandas style.where and export in excel using openpyxl.

My code so far:

df = pd.DataFrame({
    'config_dummy1': ["dummytext"] * 100,
    'config_size_x': ["textstring"] * 100,
    'config_size_y': ["textstring"] * 100,
    'config_dummy2': ["dummytext"] * 100
})
df.at[50, 'config_size_x'] = "xandydontmatch"
df.at[99, 'config_size_y'] = "xandydontmatch"
print(df)
df.style.where(
    df['config_size_x'] != df['config_size_y'],
    'color: #ffffff; background-color: #ba3018',
    other=''
).to_excel('styled.xlsx', engine='openpyxl')

I am stuck, as it produces an error:

Traceback (most recent call last):
  File "python-match-csv.py", line 205, in <module>
    main2()
  File "python-match-csv.py", line 131, in main2
    ).to_excel('styled.xlsx', engine='openpyxl')
  File "F:\Python36\lib\site-packages\pandas\io\formats\style.py", line 175, in to_excel
    engine=engine)
  File "F:\Python36\lib\site-packages\pandas\io\formats\excel.py", line 652, in write
    freeze_panes=freeze_panes)
  File "F:\Python36\lib\site-packages\pandas\io\excel.py", line 1390, in write_cells
    for cell in cells:
  File "F:\Python36\lib\site-packages\pandas\io\formats\excel.py", line 617, in get_formatted_cells
    self._format_body()):
  File "F:\Python36\lib\site-packages\pandas\io\formats\excel.py", line 529, in _format_regular_rows
    for cell in self._generate_body(coloffset):
  File "F:\Python36\lib\site-packages\pandas\io\formats\excel.py", line 601, in _generate_body
    styles = self.styler._compute().ctx
  File "F:\Python36\lib\site-packages\pandas\io\formats\style.py", line 514, in _compute
    r = func(self)(*args, **kwargs)
  File "F:\Python36\lib\site-packages\pandas\io\formats\style.py", line 604, in _applymap
    result = self.data.loc[subset].applymap(func)
  File "F:\Python36\lib\site-packages\pandas\core\frame.py", line 6072, in applymap
    return self.apply(infer)
  File "F:\Python36\lib\site-packages\pandas\core\frame.py", line 6014, in apply
    return op.get_result()
  File "F:\Python36\lib\site-packages\pandas\core\apply.py", line 318, in get_result
    return super(FrameRowApply, self).get_result()
  File "F:\Python36\lib\site-packages\pandas\core\apply.py", line 142, in get_result
    return self.apply_standard()
  File "F:\Python36\lib\site-packages\pandas\core\apply.py", line 248, in apply_standard
    self.apply_series_generator()
  File "F:\Python36\lib\site-packages\pandas\core\apply.py", line 277, in apply_series_generator
    results[i] = self.f(v)
  File "F:\Python36\lib\site-packages\pandas\core\frame.py", line 6070, in infer
    return lib.map_infer(x.astype(object).values, func)
  File "pandas/_libs/src\inference.pyx", line 1472, in pandas._libs.lib.map_infer
  File "F:\Python36\lib\site-packages\pandas\io\formats\style.py", line 671, in <lambda>
    return self.applymap(lambda val: value if cond(val) else other,
TypeError: ("'Series' object is not callable", 'occurred at index config_dummy1')

TypeError: ("'Series' object is not callable", 'occurred at index config_dummy1'

I am open to suggestions other than .where(), I also tried to do this with .apply() but failed.

Note: the column index position is not fixed, it could be config_size_x, config_dummy1, config_dummy2, config_size_y or any other combination

Note 2: using windows and python 3.6 if it matters


Solution

  • You can create DataFrame of styles with apply:

    def color(x):
        c1 = 'color: #ffffff; background-color: #ba3018'
        m = x['config_size_x'] != x['config_size_y']
        df1 = pd.DataFrame('', index=x.index, columns=x.columns)
        df1.loc[m, ['config_size_x', 'config_size_y']] = c1
        return df1
    
    df.style.apply(color, axis=None)
    

    General solution:

    df = pd.DataFrame({
        'config_dummy1': ["dummytext"] * 10,
        'a_y': ["a"] * 10,
        'config_size_x': ["textstring"] * 10,
        'config_size_y': ["textstring"] * 10,
        'config_dummy2': ["dummytext"] * 10,
        'a_x': ["a"] * 10
    })
    df.at[5, 'config_size_x'] = "xandydontmatch"
    df.at[9, 'config_size_y'] = "xandydontmatch"
    df.at[0, 'a_x'] = "xandydontmatch"
    df.at[3, 'a_y'] = "xandydontmatch"
    print(df)
    
    def color(x):
        c1 = 'color: #ffffff; background-color: #ba3018'
        df1 = pd.DataFrame('', index=x.index, columns=x.columns)
    
        #select only columns ends with _x and _y and sorting
        cols = sorted(x.filter(regex='_x$|_y$').columns)
        #loop by pairs and assign style by mask
        for i, j in zip(cols[::2],cols[1::2]):
            #pairs columns 
            #print (i, j)
            m = x[i] != x[j]
            df1.loc[m, [i, j]] = c1
        return df1
    
    df.style.apply(color, axis=None).to_excel('styled.xlsx', engine='openpyxl')