I have this df that has three columns A
, B
, C
and I want to color the cell of B
if A
and B
are equal. I also want to export it to excel with the style.
data = [[10, 30, 30], [15, 20, 33], [14, 40, 36]]
df = pd.DataFrame(data, columns=['A', 'B', 'C'])
def highlight(df, color):
return np.where(df['A'] == df['B'], f"color: {color};", None)
df.style.apply(highlight, color='red', subset=['B']).to_excel(os.path.join(".\\data", 'highlighted_data.xlsx'), index=False)
But I get an error of this form:
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
Cell In[4], line 7
5 def highlight_max(df, color):
6 return np.where(df['A'] == df['B'], f"color: {color};", None)
----> 7 df.style.apply(highlight_max, color='red', subset=['A']).to_excel(os.path.join(".\\data", 'highlighted_data.xlsx'), index=False)
8 # df
9 # ------------ ------------
11 def highlight_pred(df, output_dir):
File d:\Apps\envs\cuda11\Lib\site-packages\pandas\io\formats\style.py:537, in Styler.to_excel(self, excel_writer, sheet_name, na_rep, float_format, columns, header, index, index_label, startrow, startcol, engine, merge_cells, encoding, inf_rep, verbose, freeze_panes, storage_options)
509 @doc(
510 NDFrame.to_excel,
511 klass="Styler",
(...)
533 storage_options: StorageOptions | None = None,
534 ) -> None:
535 from pandas.io.formats.excel import ExcelFormatter
--> 537 formatter = ExcelFormatter(
538 self,
539 na_rep=na_rep,
540 cols=columns,
541 header=header,
542 float_format=float_format,
543 index=index,
...
--> 417 raise KeyError(key)
418 self._check_indexing_error(key)
419 raise KeyError(key)
KeyError: 'A'
And I have no idea what s going on.
I tried to set axis=1
df.style.apply(highlight, color='red', subset=['A'], axis=1).to_excel(os.path.join(".\\data", 'highlighted_data.xlsx'), index=False)
but with no success.
You can use this:
data = {'A': [1, 2, 3], 'B': [4, 2, 3], 'C': [7, 8, 9], 'D' : [7, 8, 9], 'E': [4, 5, 3]}
df = pd.DataFrame(data)
first_column_of_interest = 'A'
second_column_of_interest = 'B'
def highlight_max(s):
# Only highlight second column of interest
t = pd.Series('', index=s.index)
if s[first_column_of_interest] == s[second_column_of_interest]:
t[second_column_of_interest] = 'background-color: yellow'
return t
#Writes to styled.xlsx
df.style.apply(highlight_max, axis=1).to_excel('styled.xlsx', engine='openpyxl')
If columns C and D are of interest:
first_column_of_interest = 'C'
second_column_of_interest = 'D'