pythonaltair

Bars almost disappear when I layer and facet charts


import numpy as np
import pandas as pd
import altair as alt

np.random.seed(0)

model_keys = ['M1', 'M2']
data_keys = ['D1', 'D2']
scene_keys = ['S1', 'S2']
layer_keys = ['L1', 'L2']

ys = []
models = []
dataset = []
layers = []
scenes = []

for sc in scene_keys:
    for m in model_keys:
        for d in data_keys:
            for l in layer_keys:
                for s in range(10):
                    data_y = list(np.random.rand(10) / 10)
                    ys += data_y
                    scenes += [sc] * len(data_y)
                    models += [m] * len(data_y)
                    dataset += [d] * len(data_y)
                    layers += [l] * len(data_y)

df = pd.DataFrame({
    'Y': ys,
    'Model': models,
    'Dataset': dataset,
    'Layer': layers,
    'Scenes': scenes},
)

bars = alt.Chart(df, width=100, height=90).mark_bar(tooltip=True).encode(
    x=alt.X("Scenes:N"),
    y=alt.Y("mean(Y):Q"),
    color=alt.Color("Scenes:N"),
    opacity=alt.Opacity(
        "Dataset:N",
        scale=alt.Scale(
            domain=['D1', 'D2'],
        ),
        legend=alt.Legend(
            labelExpr="datum.label == 'D1' ? 'D1 - transparent' : 'D2 - full'"
        ),
    ),
    xOffset=alt.XOffset("Dataset:N"),
    column=alt.Column('Layer:N'),
    row=alt.Row("Model:N")
)

bars.save('test.html')

This script first generates some random data where each configuration is determined by four values: model, dataset, layer, scene. Then, it stores it into a dataframe and make a chart plot. This works fine and gives me this.

enter image description here

But I need to add error bars and text, and here is where things get wrong. First, I need to remove row and column from the chart or I can't layer it. Then I make an error bar chart and text chart, layer them, and facet them according to row and column again.

bars = alt.Chart(df, width=100, height=90).mark_bar(tooltip=True).encode(
    x=alt.X("Scenes:N"),
    y=alt.Y("mean(Y):Q"),
    color=alt.Color("Scenes:N"),
    opacity=alt.Opacity(
        "Dataset:N",
        scale=alt.Scale(
            domain=['D1', 'D2'],
        ),
        legend=alt.Legend(
            labelExpr="datum.label == 'D1' ? 'D1 - transparent' : 'D2 - full'"
        ),
    ),
    xOffset=alt.XOffset("Dataset:N"),
    # column=alt.Column('Layer:N'),
    # row=alt.Row("Model:N")
)

error_bars = alt.Chart(df).mark_errorbar(extent='ci').encode(
    x=alt.X('Scenes:N'),
    y=alt.Y('Y:Q'),
)

text = alt.Chart(df).mark_text(align='center',
    baseline='line-bottom',
    color='black',
    dy=-5, # y-shift
).encode(
    x=alt.X('Scenes:N'),
    y=alt.Y('mean(Y):Q'),
    text=alt.Text('mean(Y):Q', format='.1f'),
)

combined = alt.layer(bars, error_bars, text).facet(
    column=alt.Column('Layer:N'),
    row=alt.Row("Model:N"),
    spacing={"row": 0, "column": 15},
).resolve_scale(x='independent')

combined.save('test.html')

The aggregation works, but the bars suddenly become extremely thin. How can I fix it?

enter image description here


Solution

  • This is not caused by the error bars but comes from using facet instead of row and column encodings. It's possible that this is a bug, but there is an easy enough work around: If you set the width as a step instead of a fixed size it works fine. Sharing the X scale also works, but I'm sure there are situations where that doesn't make sense.

    enter image description here

    bars = alt.Chart(df, width=alt.Step(20), height=90).mark_bar(tooltip=True).encode(
        x=alt.X("Scenes:N"),
        y=alt.Y("mean(Y):Q"),
        color=alt.Color("Scenes:N"),
        opacity=alt.Opacity(
            "Dataset:N",
            scale=alt.Scale(
                domain=['D1', 'D2'],
            ),
            legend=alt.Legend(
                labelExpr="datum.label == 'D1' ? 'D1 - transparent' : 'D2 - full'"
            ),
        ),
        xOffset=alt.XOffset("Dataset:N"),
        # column=alt.Column('Layer:N'),
        # row=alt.Row("Model:N")
    )
    
    error_bars = alt.Chart(df).mark_errorbar(extent='ci').encode(
        x=alt.X('Scenes:N'),
        y=alt.Y('Y:Q'),
    )
    
    text = alt.Chart(df).mark_text(align='center',
        baseline='line-bottom',
        color='black',
        dy=-5, # y-shift
    ).encode(
        x=alt.X('Scenes:N'),
        y=alt.Y('mean(Y):Q'),
        text=alt.Text('mean(Y):Q', format='.1f'),
    )
    
    combined = alt.layer(bars, error_bars, text).facet(
        column=alt.Column('Layer:N'),
        row=alt.Row("Model:N"),
        spacing={"row": 0, "column": 15},
    ).resolve_scale(x='independent')
    
    combined