import numpy as np
import pandas as pd
import altair as alt
np.random.seed(0)
model_keys = ['M1', 'M2']
data_keys = ['D1', 'D2']
scene_keys = ['S1', 'S2']
layer_keys = ['L1', 'L2']
ys = []
models = []
dataset = []
layers = []
scenes = []
for sc in scene_keys:
for m in model_keys:
for d in data_keys:
for l in layer_keys:
for s in range(10):
data_y = list(np.random.rand(10) / 10)
ys += data_y
scenes += [sc] * len(data_y)
models += [m] * len(data_y)
dataset += [d] * len(data_y)
layers += [l] * len(data_y)
df = pd.DataFrame({
'Y': ys,
'Model': models,
'Dataset': dataset,
'Layer': layers,
'Scenes': scenes},
)
bars = alt.Chart(df, width=100, height=90).mark_bar(tooltip=True).encode(
x=alt.X("Scenes:N"),
y=alt.Y("mean(Y):Q"),
color=alt.Color("Scenes:N"),
opacity=alt.Opacity(
"Dataset:N",
scale=alt.Scale(
domain=['D1', 'D2'],
),
legend=alt.Legend(
labelExpr="datum.label == 'D1' ? 'D1 - transparent' : 'D2 - full'"
),
),
xOffset=alt.XOffset("Dataset:N"),
column=alt.Column('Layer:N'),
row=alt.Row("Model:N")
)
bars.save('test.html')
This script first generates some random data where each configuration is determined by four values: model, dataset, layer, scene. Then, it stores it into a dataframe and make a chart plot. This works fine and gives me this.
But I need to add error bars and text, and here is where things get wrong. First, I need to remove row
and column
from the chart or I can't layer it. Then I make an error bar chart and text chart, layer them, and facet them according to row
and column
again.
bars = alt.Chart(df, width=100, height=90).mark_bar(tooltip=True).encode(
x=alt.X("Scenes:N"),
y=alt.Y("mean(Y):Q"),
color=alt.Color("Scenes:N"),
opacity=alt.Opacity(
"Dataset:N",
scale=alt.Scale(
domain=['D1', 'D2'],
),
legend=alt.Legend(
labelExpr="datum.label == 'D1' ? 'D1 - transparent' : 'D2 - full'"
),
),
xOffset=alt.XOffset("Dataset:N"),
# column=alt.Column('Layer:N'),
# row=alt.Row("Model:N")
)
error_bars = alt.Chart(df).mark_errorbar(extent='ci').encode(
x=alt.X('Scenes:N'),
y=alt.Y('Y:Q'),
)
text = alt.Chart(df).mark_text(align='center',
baseline='line-bottom',
color='black',
dy=-5, # y-shift
).encode(
x=alt.X('Scenes:N'),
y=alt.Y('mean(Y):Q'),
text=alt.Text('mean(Y):Q', format='.1f'),
)
combined = alt.layer(bars, error_bars, text).facet(
column=alt.Column('Layer:N'),
row=alt.Row("Model:N"),
spacing={"row": 0, "column": 15},
).resolve_scale(x='independent')
combined.save('test.html')
The aggregation works, but the bars suddenly become extremely thin. How can I fix it?
This is not caused by the error bars but comes from using facet
instead of row
and column
encodings. It's possible that this is a bug, but there is an easy enough work around: If you set the width as a step instead of a fixed size it works fine. Sharing the X scale also works, but I'm sure there are situations where that doesn't make sense.
bars = alt.Chart(df, width=alt.Step(20), height=90).mark_bar(tooltip=True).encode(
x=alt.X("Scenes:N"),
y=alt.Y("mean(Y):Q"),
color=alt.Color("Scenes:N"),
opacity=alt.Opacity(
"Dataset:N",
scale=alt.Scale(
domain=['D1', 'D2'],
),
legend=alt.Legend(
labelExpr="datum.label == 'D1' ? 'D1 - transparent' : 'D2 - full'"
),
),
xOffset=alt.XOffset("Dataset:N"),
# column=alt.Column('Layer:N'),
# row=alt.Row("Model:N")
)
error_bars = alt.Chart(df).mark_errorbar(extent='ci').encode(
x=alt.X('Scenes:N'),
y=alt.Y('Y:Q'),
)
text = alt.Chart(df).mark_text(align='center',
baseline='line-bottom',
color='black',
dy=-5, # y-shift
).encode(
x=alt.X('Scenes:N'),
y=alt.Y('mean(Y):Q'),
text=alt.Text('mean(Y):Q', format='.1f'),
)
combined = alt.layer(bars, error_bars, text).facet(
column=alt.Column('Layer:N'),
row=alt.Row("Model:N"),
spacing={"row": 0, "column": 15},
).resolve_scale(x='independent')
combined