I'm trying to get a barplot to rotate it's X Labels in 45° to make them readable (as is, there's overlap).
len(genero)
is 7, and len(filmes_por_genero)
is 20
I'm using a MovieLens dataset and making a graph counting the number of movies in each individual genre. Here's my code as of now:
import seaborn as sns
import matplotlib.pyplot as plt
sns.set_style("whitegrid")
filmes_por_genero = filmes["generos"].str.get_dummies('|').sum().sort_values(ascending=False)
genero = filmes_com_media.index
chart = plt.figure(figsize=(16,8))
sns.barplot(x=genero,
y=filmes_por_genero.values,
palette=sns.color_palette("BuGn_r", n_colors=len(filmes_por_genero) + 4)
)
chart.set_xticklabels(
chart.get_xticklabels(),
rotation=45,
horizontalalignment='right'
)
Here's the full error:
/usr/local/lib/python3.6/dist-packages/pandas/core/groupby/grouper.py in get_grouper(obj, key, axis, level, sort, observed, mutated, validate)
623 in_axis=in_axis,
624 )
--> 625 if not isinstance(gpr, Grouping)
626 else gpr
627 )
/usr/local/lib/python3.6/dist-packages/pandas/core/groupby/grouper.py in __init__(self, index, grouper, obj, name, level, sort, observed, in_axis)
254 self.name = name
255 self.level = level
--> 256 self.grouper = _convert_grouper(index, grouper)
257 self.all_grouper = None
258 self.index = index
/usr/local/lib/python3.6/dist-packages/pandas/core/groupby/grouper.py in _convert_grouper(axis, grouper)
653 elif isinstance(grouper, (list, Series, Index, np.ndarray)):
654 if len(grouper) != len(axis):
--> 655 raise ValueError("Grouper and axis must be same length")
656 return grouper
657 else:
ValueError: Grouper and axis must be same length
Axes
interface with the seaborn axes-level functions.
xticklabels
, the easiest option is ax.tick_params(axis='x', labelrotation=45)
, but horizontalalignment
/ ha
can't be set.
ax.set_xticks
can be used with ax.get_xticks
and ax.get_xticklabels
ax.set_xticklabels
works with ax.get_xticklabels
, but results in UserWarning: set_ticklabels() should only be used with a fixed number of ticks, i.e. after set_ticks() or using a FixedLocator.
sns.barplot
is a categorical plot, and native_scale=False
by default, the xticks are 0 indexed, so the xticklabels can be set, and the warning ignored.python v3.12.0
, pandas v2.1.2
, matplotlib v3.8.1
, seaborn v0.13.0
.import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
# data
df = pd.read_csv('ml-25m/movies.csv')
print(df.head())
movieId title genres
0 1 Toy Story (1995) Adventure|Animation|Children|Comedy|Fantasy
1 2 Jumanji (1995) Adventure|Children|Fantasy
2 3 Grumpier Old Men (1995) Comedy|Romance
3 4 Waiting to Exhale (1995) Comedy|Drama|Romance
4 5 Father of the Bride Part II (1995) Comedy
# split the strings in the genres column
df['genres'] = df['genres'].str.split('|')
# explode the lists that result for str.split
df = df.explode('genres', ignore_index=True)
print(df.head())
movieId title genres
0 1 Toy Story (1995) Adventure
1 1 Toy Story (1995) Animation
2 1 Toy Story (1995) Children
3 1 Toy Story (1995) Comedy
4 1 Toy Story (1995) Fantasy
gc = df.genres.value_counts().reset_index()
print(gc)
genres count
0 Drama 25606
1 Comedy 16870
2 Thriller 8654
3 Romance 7719
4 Action 7348
5 Horror 5989
6 Documentary 5605
7 Crime 5319
8 (no genres listed) 5062
9 Adventure 4145
10 Sci-Fi 3595
11 Children 2935
12 Animation 2929
13 Mystery 2925
14 Fantasy 2731
15 War 1874
16 Western 1399
17 Musical 1054
18 Film-Noir 353
19 IMAX 195
sns.barplot
fig, ax = plt.subplots(figsize=(12, 6))
sns.barplot(data=gc, x='genres', y='count', hue='genres', palette=sns.color_palette("BuGn_r", n_colors=len(gc)), ec='k', legend=False, ax=ax)
ax.tick_params(axis='x', labelrotation=45)
# ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha='right')
# ax.set_xticks(ticks=ax.get_xticks(), labels=ax.get_xticklabels(), rotation=45, ha='right')
plt.show()
plt.figure(figsize=(12, 6))
ax = sns.barplot(data=gc, x='genres', y='count', hue='genres', palette=sns.color_palette("BuGn_r", n_colors=len(gc)), ec='k', legend=False)
ax.tick_params(axis='x', labelrotation=45)
# ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha='right')
# ax.set_xticks(ticks=ax.get_xticks(), labels=ax.get_xticklabels(), rotation=45, ha='right')
plt.show()
sns.countplot
sns.countplot
to skip using .value_counts()
if the plot order doesn't matter.countplot
, order=df.genres.value_counts().index
must be used, so countplot
doesn't really save you from needing .value_counts()
, if a descending order is desired.fig, ax = plt.subplots(figsize=(12, 6))
sns.countplot(data=df, x='genres', ax=ax)
ax.tick_params(axis='x', labelrotation=45)
# ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha='right')
# ax.set_xticks(ticks=ax.get_xticks(), labels=ax.get_xticklabels(), rotation=45, ha='right')
plt.show()
pandas.DataFrame.plot
.value_counts
can be plotted directly, and the rot=
parameter can be used to rotate the xticklabels.ax = df.genres.value_counts().plot(kind='bar', rot=45, width=0.85, ec='k', figsize=(12, 6))
plt.figure(figsize=(6, 4))
ax = sns.barplot(data=gc, y='genres', x='count', orient='h', hue='genres',
palette=sns.color_palette("BuGn_r", n_colors=len(gc)), ec='k', legend=False)