I have a data frame I'm trying to plot as a stripplot using Seaborn. The data structure is:
Group | Value |
---|---|
"1" | 3.1 |
"2" | 1.2 |
... | ... |
Where 'Group' are strings and 'Value' are floats.
I want to assign a color to any value less than a threshold value and I've tried assigning an array of colors for each point in the plot, but when I run this I get the error: "ValueError: 'c' argument has 37 elements, which is inconsistent with 'x' and 'y' with size 10"
I'm not sure how to troubleshoot this because I do not know where the size of 10 is coming from. the length of data["Group"] and data["Value"] is 37. The length of the unique values, i.e. len(set(data["Group"]))
is 4, so smaller than my colors list, but not 10.
The code I currently have is:
import pandas as pd
import numpy as np
import seaborn
from matplotlib import pyplot as plt
THRESHOLD = 0.5
data = pd.DataFrame({
"Group": ["1","1","1","2","3","2","2","3","3"],
"Value": [0.3, 1.2, 4.2, 5.1, 0.1, 0.2, 3.2, 4.1, 0.2]
})
colors = np.where(data["Value"] < THRESHOLD, "r", "#444444")
seaborn.stripplot(
data = data,
x = "Group",
y = "Value",
color = colors
)
Edited to provide a fully working example. The error produced in this example is: "ValueError: 'c' argument has 9 elements, which is inconsistent with 'x' and 'y' with size 3."
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Cell In[7], line 11
4 data = pd.DataFrame({
5 "Group": ["1","1","1","2","3","2","2","3","3"],
6 "Value": [0.3, 1.2, 4.2, 5.1, 0.1, 0.2, 3.2, 4.1, 0.2]
7 })
9 colors = np.where(data["Value"] < THRESHOLD, "r", "#444444")
---> 11 sns.stripplot(
12 data = data,
13 x = "Group",
14 y = "Value",
15 color = colors
16 )
File ~\anaconda3\envs\py11\Lib\site-packages\seaborn\categorical.py:2537, in stripplot(data, x, y, hue, order, hue_order, jitter, dodge, orient, color, palette, size, edgecolor, linewidth, hue_norm, native_scale, formatter, legend, ax, **kwargs)
2529 size = kwargs.get("s", size)
2531 kwargs.update(dict(
2532 s=size ** 2,
2533 edgecolor=edgecolor,
2534 linewidth=linewidth)
2535 )
-> 2537 p.plot_strips(
2538 jitter=jitter,
2539 dodge=dodge,
2540 color=color,
2541 edgecolor=edgecolor,
2542 plot_kws=kwargs,
2543 )
2545 # XXX this happens inside a plotting method in the distribution plots
2546 # but maybe it's better out here? Alternatively, we have an open issue
2547 # suggesting that _attach could add default axes labels, which seems smart.
2548 p._add_axis_labels(ax)
File ~\anaconda3\envs\py11\Lib\site-packages\seaborn\categorical.py:299, in _CategoricalPlotterNew.plot_strips(self, jitter, dodge, color, edgecolor, plot_kws)
296 sub_data[var] = np.power(10, sub_data[var])
298 ax = self._get_axes(sub_vars)
--> 299 points = ax.scatter(sub_data["x"], sub_data["y"], color=color, **plot_kws)
301 if "hue" in self.variables:
302 points.set_facecolors(self._hue_map(sub_data["hue"]))
File ~\anaconda3\envs\py11\Lib\site-packages\matplotlib\__init__.py:1442, in _preprocess_data.<locals>.inner(ax, data, *args, **kwargs)
1439 @functools.wraps(func)
1440 def inner(ax, *args, data=None, **kwargs):
1441 if data is None:
-> 1442 return func(ax, *map(sanitize_sequence, args), **kwargs)
1444 bound = new_sig.bind(ax, *args, **kwargs)
1445 auto_label = (bound.arguments.get(label_namer)
1446 or bound.kwargs.get(label_namer))
File ~\anaconda3\envs\py11\Lib\site-packages\matplotlib\axes\_axes.py:4602, in Axes.scatter(self, x, y, s, c, marker, cmap, norm, vmin, vmax, alpha, linewidths, edgecolors, plotnonfinite, **kwargs)
4599 if edgecolors is None:
4600 orig_edgecolor = kwargs.get('edgecolor', None)
4601 c, colors, edgecolors = \
-> 4602 self._parse_scatter_color_args(
4603 c, edgecolors, kwargs, x.size,
4604 get_next_color_func=self._get_patches_for_fill.get_next_color)
4606 if plotnonfinite and colors is None:
4607 c = np.ma.masked_invalid(c)
File ~\anaconda3\envs\py11\Lib\site-packages\matplotlib\axes\_axes.py:4455, in Axes._parse_scatter_color_args(c, edgecolors, kwargs, xsize, get_next_color_func)
4451 else:
4452 if len(colors) not in (0, 1, xsize):
4453 # NB: remember that a single color is also acceptable.
4454 # Besides *colors* will be an empty array if c == 'none'.
-> 4455 raise invalid_shape_exception(len(colors), xsize)
4456 else:
4457 colors = None # use cmap, norm after collection is created
ValueError: 'c' argument has 9 elements, which is inconsistent with 'x' and 'y' with size 3.
You should use hue
instead of color
to mark the two groups, and palette
to assign the desired colors.
import seaborn as sns
import pandas as pd
import numpy as np
df = {'Value': np.random.randn(10),
'Group': np.random.randint(0, 10, size=(10,))}
data = pd.DataFrame(df)
THRESHOLD = 0.1
colors = np.where(data["Value"] < THRESHOLD, "less than threshold", "Bigger than threshold")
sns.stripplot(
data = data,
x = "Group",
y = "Value",
hue = colors,
palette=['red', 'blue']
)