I'm writing a function that draw line charts based on data in a worksheet.
In this function, the flag "secondary" checks if the chart will be plotted with a secondary axis, following the example here.
Everything works fine if there are two or more series plotted on the same chart. But if there's just one series, then somehow each time segment is assigned a different color. Why does openpyxl do this and what's the fix?
import openpyxl
from openpyxl import Workbook, load_workbook
from openpyxl.styles import Alignment, NamedStyle
from openpyxl.utils.dataframe import dataframe_to_rows
from openpyxl.utils import get_column_letter
from openpyxl.chart import LineChart, Reference
from openpyxl.chart.axis import ChartLines, TextAxis
from openpyxl.drawing.line import LineProperties
from openpyxl.drawing.text import ParagraphProperties, CharacterProperties
from openpyxl.chart.layout import Layout, ManualLayout
from openpyxl.chart.shapes import GraphicalProperties
def produce_line_chart(ws_data, charts_sheet, series_list, secondary=False):
"""
Creates a customized line chart based on data from a worksheet and prepares it for placement on a charts sheet.
Args:
ws_data (Worksheet): The worksheet containing the data to be charted.
charts_sheet (Worksheet): The worksheet where the chart will be placed.
series_list (list): A list of strings where the first item is the chart title and subsequent items are the series names to be charted.
secondary (bool): If True, disables major gridlines on both axes. Defaults to False.
Returns:
tuple: A tuple containing the created chart object and the suggested position for placing the chart on the charts sheet.
"""
# Create a line chart
chart = LineChart()
# Set the title of the chart to the first item in series_list
chart.title = series_list[0]
# Increase the chart title's font size to 20pt
chart.title.tx.rich.paragraphs[0].pPr = ParagraphProperties(
defRPr=CharacterProperties(sz=2000) # Font size 20pt
)
# Configure the appearance of the axes
chart.x_axis = TextAxis(tickMarkSkip=4)
chart.x_axis.spPr = GraphicalProperties(ln=LineProperties(solidFill="D9D9D9"))
chart.y_axis.spPr = GraphicalProperties(ln=LineProperties(solidFill="D9D9D9"))
# Add or remove major gridlines based on the 'secondary' flag
if secondary:
chart.x_axis.majorGridlines = None
chart.y_axis.majorGridlines = None
else:
chart.x_axis.majorGridlines = ChartLines()
chart.x_axis.majorGridlines.spPr = GraphicalProperties(
ln=LineProperties(solidFill="D9D9D9")
)
chart.y_axis.majorGridlines = ChartLines()
chart.y_axis.majorGridlines.spPr = GraphicalProperties(
ln=LineProperties(solidFill="D9D9D9")
)
# Set tick marks on the axes
chart.x_axis.majorTickMark = "out"
chart.y_axis.majorTickMark = "cross"
# Ensure axes are displayed
chart.x_axis.delete = False
chart.y_axis.delete = False
# Set the legend position at the bottom of the chart
chart.legend.position = "b"
# Adjust the layout to ensure enough space for the legend
chart.layout = Layout(
manualLayout=ManualLayout(
x=0, # x position of the plot area
y=0, # y position of the plot area
h=0.85, # height of the plot area
w=0.9, # width of the plot area
)
)
# Scale the chart by adjusting its height and width
chart.height = chart.height * 1.71
chart.width = chart.width * 2.03
# Get the first row of the worksheet data to identify the series columns
first_row = next(ws_data.iter_rows(min_row=1, max_row=1, values_only=True), [])
# Loop through each series name in series_list (excluding the title)
num_rows = ws_data.max_row
for series in series_list[1:]:
for idx, cell_value in enumerate(first_row, start=1):
if cell_value == series:
col = idx
break
# Create a reference to the data for the series and add it to the chart
series_to_chart = Reference(
ws_data, min_col=col, min_row=1, max_col=col, max_row=num_rows
)
chart.add_data(series_to_chart, titles_from_data=True)
# Ensure the series line is not smoothed
chart.series[-1].smooth = False
# Set the categories (x-axis labels) based on the first column of data (excluding the header)
categories = Reference(ws_data, min_col=1, min_row=2, max_row=ws_data.max_row)
chart.set_categories(categories)
# Determine the next available position for the chart on the charts sheet
chart_row = 1 # Start in the first row
for existing_chart in charts_sheet._charts:
existing_chart_row = int("".join(filter(str.isdigit, existing_chart.anchor)))
# Convert the chart height from cm to Excel cell height and add some space
chart_row = max(chart_row, existing_chart_row + int(chart.height * 1.86) + 4)
# Determine the position to add the chart on the charts sheet
chart_position = f"A{chart_row}"
return chart, chart_position
The problem depends on the Version of Microsoft Excel which shows the chart.
There is a setting for varying the colors of same-series data markers in a chart. See: Vary the colors of same-series data markers in a chart. That was false
per default up to Excel 2007
. Then Microsoft
decided to make that setting true
per default. And from Microsoft 365 on it is false
per default again. So you need explicitly set that false
if you do not want it in any Microsoft Excel version.
For openpyxl see LineChart.varyColors. The default is not to set that. Then the Excel version default get used.
Minimized use case:
from datetime import date
from openpyxl import Workbook
from openpyxl.chart import (
LineChart,
Reference,
)
wb = Workbook()
ws = wb.active
rows = [
['Date', 'Ser 1'],
[date(2015,9, 1), 40],
[date(2015,9, 2), 40],
[date(2015,9, 3), 50],
[date(2015,9, 4), 30],
[date(2015,9, 5), 25],
[date(2015,9, 6), 20],
]
for row in rows:
ws.append(row)
chart = LineChart()
chart.title = "Line Chart"
chart.y_axis.title = 'Value'
chart.x_axis.title = 'Date'
data = Reference(ws, min_col=2, min_row=1, max_col=2, max_row=7)
chart.add_data(data, titles_from_data=True)
dates = Reference(ws, min_col=1, min_row=2, max_row=7)
chart.set_categories(dates)
#chart.varyColors = True
chart.varyColors = False
ws.add_chart(chart, "A10")
wb.save("line.xlsx")
There chart.varyColors = True
will always show each data point in another color. While chart.varyColors = False
will never do this.