pythonmatplotlibseabornstripplot

ValueError when passing array of colors to a stripplot


I have a data frame I'm trying to plot as a stripplot using Seaborn. The data structure is:

Group Value
"1" 3.1
"2" 1.2
... ...

Where 'Group' are strings and 'Value' are floats.

I want to assign a color to any value less than a threshold value and I've tried assigning an array of colors for each point in the plot, but when I run this I get the error: "ValueError: 'c' argument has 37 elements, which is inconsistent with 'x' and 'y' with size 10"

I'm not sure how to troubleshoot this because I do not know where the size of 10 is coming from. the length of data["Group"] and data["Value"] is 37. The length of the unique values, i.e. len(set(data["Group"])) is 4, so smaller than my colors list, but not 10.

The code I currently have is:

import pandas as pd
import numpy as np
import seaborn
from matplotlib import pyplot as plt


THRESHOLD = 0.5


data = pd.DataFrame({
    "Group": ["1","1","1","2","3","2","2","3","3"],
    "Value": [0.3, 1.2, 4.2, 5.1, 0.1, 0.2, 3.2, 4.1, 0.2]
})

colors = np.where(data["Value"] < THRESHOLD, "r", "#444444")

seaborn.stripplot(
    data = data,
    x = "Group",
    y = "Value",
    color = colors
)

Edited to provide a fully working example. The error produced in this example is: "ValueError: 'c' argument has 9 elements, which is inconsistent with 'x' and 'y' with size 3."

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[7], line 11
      4 data = pd.DataFrame({
      5     "Group": ["1","1","1","2","3","2","2","3","3"],
      6     "Value": [0.3, 1.2, 4.2, 5.1, 0.1, 0.2, 3.2, 4.1, 0.2]
      7 })
      9 colors = np.where(data["Value"] < THRESHOLD, "r", "#444444")
---> 11 sns.stripplot(
     12     data = data,
     13     x = "Group",
     14     y = "Value",
     15     color = colors
     16 )

File ~\anaconda3\envs\py11\Lib\site-packages\seaborn\categorical.py:2537, in stripplot(data, x, y, hue, order, hue_order, jitter, dodge, orient, color, palette, size, edgecolor, linewidth, hue_norm, native_scale, formatter, legend, ax, **kwargs)
   2529 size = kwargs.get("s", size)
   2531 kwargs.update(dict(
   2532     s=size ** 2,
   2533     edgecolor=edgecolor,
   2534     linewidth=linewidth)
   2535 )
-> 2537 p.plot_strips(
   2538     jitter=jitter,
   2539     dodge=dodge,
   2540     color=color,
   2541     edgecolor=edgecolor,
   2542     plot_kws=kwargs,
   2543 )
   2545 # XXX this happens inside a plotting method in the distribution plots
   2546 # but maybe it's better out here? Alternatively, we have an open issue
   2547 # suggesting that _attach could add default axes labels, which seems smart.
   2548 p._add_axis_labels(ax)

File ~\anaconda3\envs\py11\Lib\site-packages\seaborn\categorical.py:299, in _CategoricalPlotterNew.plot_strips(self, jitter, dodge, color, edgecolor, plot_kws)
    296         sub_data[var] = np.power(10, sub_data[var])
    298 ax = self._get_axes(sub_vars)
--> 299 points = ax.scatter(sub_data["x"], sub_data["y"], color=color, **plot_kws)
    301 if "hue" in self.variables:
    302     points.set_facecolors(self._hue_map(sub_data["hue"]))

File ~\anaconda3\envs\py11\Lib\site-packages\matplotlib\__init__.py:1442, in _preprocess_data.<locals>.inner(ax, data, *args, **kwargs)
   1439 @functools.wraps(func)
   1440 def inner(ax, *args, data=None, **kwargs):
   1441     if data is None:
-> 1442         return func(ax, *map(sanitize_sequence, args), **kwargs)
   1444     bound = new_sig.bind(ax, *args, **kwargs)
   1445     auto_label = (bound.arguments.get(label_namer)
   1446                   or bound.kwargs.get(label_namer))

File ~\anaconda3\envs\py11\Lib\site-packages\matplotlib\axes\_axes.py:4602, in Axes.scatter(self, x, y, s, c, marker, cmap, norm, vmin, vmax, alpha, linewidths, edgecolors, plotnonfinite, **kwargs)
   4599 if edgecolors is None:
   4600     orig_edgecolor = kwargs.get('edgecolor', None)
   4601 c, colors, edgecolors = \
-> 4602     self._parse_scatter_color_args(
   4603         c, edgecolors, kwargs, x.size,
   4604         get_next_color_func=self._get_patches_for_fill.get_next_color)
   4606 if plotnonfinite and colors is None:
   4607     c = np.ma.masked_invalid(c)

File ~\anaconda3\envs\py11\Lib\site-packages\matplotlib\axes\_axes.py:4455, in Axes._parse_scatter_color_args(c, edgecolors, kwargs, xsize, get_next_color_func)
   4451     else:
   4452         if len(colors) not in (0, 1, xsize):
   4453             # NB: remember that a single color is also acceptable.
   4454             # Besides *colors* will be an empty array if c == 'none'.
-> 4455             raise invalid_shape_exception(len(colors), xsize)
   4456 else:
   4457     colors = None  # use cmap, norm after collection is created

ValueError: 'c' argument has 9 elements, which is inconsistent with 'x' and 'y' with size 3.

Solution

  • You should use hue instead of color to mark the two groups, and palette to assign the desired colors.

    import seaborn as sns
    import pandas as pd
    import numpy as np
    df = {'Value': np.random.randn(10),
          'Group': np.random.randint(0, 10, size=(10,))}
    data = pd.DataFrame(df)
    THRESHOLD = 0.1
    colors = np.where(data["Value"] < THRESHOLD, "less than threshold", "Bigger than threshold")
    sns.stripplot(
        data = data,
        x = "Group",
        y = "Value",
        hue = colors,
        palette=['red', 'blue']
    )