pythonvisualizationaltairparallel-coordinates

Empty plots when trying to adapt parallel coordinates example to my data


I'm trying to redo Parallel Coordinates in Altair but unfortunately i can't edit it so that it will work for me. When I run the code below, the plots show up empty without any lines. Could you please provide a pre-defined structure (perhaps with some explanation for beginners like me) so that we can change this code to pass it to our own goals . tnx.

from sklearn import datasets
data_wine = datasets.load_wine (as_frame = True).frame
new_data = data_wine.drop (['proline', 'magnesium'], axis = 1)
new_data = new_data.reset_index().melt(id_vars = ['index', 'target'])
base = alt.Chart(
    new_data
).transform_window(
    index="count()"
).transform_fold(
    #[alcohol   malic_acid  ash alcalinity_of_ash   magnesium   total_phenols   flavanoids  nonflavanoid_phenols    proanthocyanins color_intensity hue od280/od315_of_diluted_wines    "proline    "]
    ["alcohol","malic_acid","ash","alcalinity_of_ash","total_phenols","flavanoids","nonflavanoid_phenols","proanthocyanins","color_intensity","hue","od280/od315_of_diluted_wines"]
).transform_joinaggregate(
    min="min(value)",
    max="max(value)",
    groupby=["variable"]
).transform_calculate(
    norm_val="(datum.variable - datum.min) / (datum.max - datum.min)",
    mid="(datum.min + datum.max) / 2"
).properties(width=600, height=300)

lines = base.mark_line(opacity=0.3).encode(
    alt.Color ('target:N'),
    alt.Detail ('index:N'),
    x='variable:N',
    y=alt.Y('norm_val:Q', axis=None),
    #tooltip=["petalLength:N", "petalWidth:N", "sepalLength:N", "sepalWidth:N"]
)

rules = base.mark_rule(
    color="#ccc", tooltip=None
).encode(
    x="variable:N",
    detail="count():Q",
)

def ytick(yvalue, field):
    scale = base.encode(x='variable:N', y=alt.value(yvalue), text=f"min({field}):Q")
    return alt.layer(
        scale.mark_text(baseline="middle", align="right", dx=-5, tooltip=None),
        scale.mark_tick(size=8, color="#ccc", orient="horizontal", tooltip=None)
    )

alt.layer(
    lines, rules, ytick(0, "max"), ytick(150, "mid"), ytick(300, "min")
).configure_axisX(
    domain=False, labelAngle=0, tickColor="#ccc", title=None
).configure_view(
    stroke=None
)

Solution

  • The reason your plots are not showing up is because your input data does not have the same structure as in the example you are following. You have melted your wide data frame in pandas to long format. This is the same functionality performed by the transform_fold function in Altair, so in your example you are trying to do this twice. Below I have removed the manual pandas melt and changed the variable names back to the ones autmatically assigned by transform_fold (key and value):

    from sklearn import datasets
    import altair as alt
    
    data_wine = datasets.load_wine (as_frame = True).frame
    new_data = data_wine.drop (['proline', 'magnesium'], axis = 1)
    
    base = alt.Chart(
        new_data
    ).transform_window(
        index="count()"
    ).transform_fold(
        ["alcohol","malic_acid","ash","alcalinity_of_ash","total_phenols","flavanoids","nonflavanoid_phenols","proanthocyanins","color_intensity","hue","od280/od315_of_diluted_wines"]
    ).transform_joinaggregate(
        min="min(value)",
        max="max(value)",
        groupby=["key"]
    ).transform_calculate(
        norm_val="(datum.value - datum.min) / (datum.max - datum.min)",
        mid="(datum.min + datum.max) / 2"
    ).properties(width=1200, height=300)
    
    lines = base.mark_line(opacity=0.3).encode(
        x='key:N',
        y=alt.Y('norm_val:Q', axis=None),
        color=alt.Color ('target:N'),
        detail=alt.Detail ('index:N'),
    )
    
    rules = base.mark_rule(
        color="#ccc", tooltip=None
    ).encode(
        x="key:N",
        detail="count():Q",
    )
    
    def ytick(yvalue, field):
        scale = base.encode(x='key:N', y=alt.value(yvalue), text=f"min({field}):Q")
        return alt.layer(
            scale.mark_text(baseline="middle", align="right", dx=-5, tooltip=None),
            scale.mark_tick(size=8, color="#ccc", orient="horizontal", tooltip=None)
        )
    
    alt.layer(
        lines, rules, ytick(0, "max"), ytick(150, "mid"), ytick(300, "min")
    ).configure_axisX(
        domain=False, labelAngle=0, tickColor="#ccc", title=None
    ).configure_view(
        stroke=None
    )
    

    enter image description here

    You could create a simpler parallel coordinates plot like this if you are OK with not having a separate y-axis for each column in the data frame:

    from sklearn import datasets
    import altair as alt
    
    data = datasets.load_wine (as_frame=True).frame
    
    num_cols = ["alcohol","malic_acid","ash","alcalinity_of_ash","total_phenols","flavanoids","nonflavanoid_phenols","proanthocyanins","color_intensity","hue","od280/od315_of_diluted_wines"]
    
    # You could skip this rescaling but it would compress the y-axis range for columns with smaller absolute values
    data[num_cols] = data[num_cols].apply(lambda x: (x - x.min()) / (x.max() - x.min()))
    
    alt.Chart(data).transform_window(
            index='count()'
        ).transform_fold(
            num_cols
        ).mark_line().encode(
            alt.X('key:O', title=None, scale=alt.Scale(nice=False, padding=0.05)),
            alt.Y('value:Q', title=None),
            alt.Color('target:N', title=None),
            detail='index:N'
    ).properties(
        width=1200
    )
    

    enter image description here

    If you are using this for exploratory data analysis and don't need to customize the plot a lot, then you can also try out my experimental package [altair_ally][3] for quickly creating some common exploratory plots:

    from sklearn import datasets
    import altair_ally as aly
    
    
    data_wine = datasets.load_wine (as_frame = True).frame
    data_wine['target'] = data_wine['target'].astype(str)
    aly.parcoord(data_wine, color='target')
    

    enter image description here