I'm trying to redo Parallel Coordinates in Altair but unfortunately i can't edit it so that it will work for me. When I run the code below, the plots show up empty without any lines. Could you please provide a pre-defined structure (perhaps with some explanation for beginners like me) so that we can change this code to pass it to our own goals . tnx.
from sklearn import datasets
data_wine = datasets.load_wine (as_frame = True).frame
new_data = data_wine.drop (['proline', 'magnesium'], axis = 1)
new_data = new_data.reset_index().melt(id_vars = ['index', 'target'])
base = alt.Chart(
new_data
).transform_window(
index="count()"
).transform_fold(
#[alcohol malic_acid ash alcalinity_of_ash magnesium total_phenols flavanoids nonflavanoid_phenols proanthocyanins color_intensity hue od280/od315_of_diluted_wines "proline "]
["alcohol","malic_acid","ash","alcalinity_of_ash","total_phenols","flavanoids","nonflavanoid_phenols","proanthocyanins","color_intensity","hue","od280/od315_of_diluted_wines"]
).transform_joinaggregate(
min="min(value)",
max="max(value)",
groupby=["variable"]
).transform_calculate(
norm_val="(datum.variable - datum.min) / (datum.max - datum.min)",
mid="(datum.min + datum.max) / 2"
).properties(width=600, height=300)
lines = base.mark_line(opacity=0.3).encode(
alt.Color ('target:N'),
alt.Detail ('index:N'),
x='variable:N',
y=alt.Y('norm_val:Q', axis=None),
#tooltip=["petalLength:N", "petalWidth:N", "sepalLength:N", "sepalWidth:N"]
)
rules = base.mark_rule(
color="#ccc", tooltip=None
).encode(
x="variable:N",
detail="count():Q",
)
def ytick(yvalue, field):
scale = base.encode(x='variable:N', y=alt.value(yvalue), text=f"min({field}):Q")
return alt.layer(
scale.mark_text(baseline="middle", align="right", dx=-5, tooltip=None),
scale.mark_tick(size=8, color="#ccc", orient="horizontal", tooltip=None)
)
alt.layer(
lines, rules, ytick(0, "max"), ytick(150, "mid"), ytick(300, "min")
).configure_axisX(
domain=False, labelAngle=0, tickColor="#ccc", title=None
).configure_view(
stroke=None
)
The reason your plots are not showing up is because your input data does not have the same structure as in the example you are following. You have melted your wide data frame in pandas to long format. This is the same functionality performed by the transform_fold
function in Altair, so in your example you are trying to do this twice. Below I have removed the manual pandas melt and changed the variable names back to the ones autmatically assigned by transform_fold
(key
and value
):
from sklearn import datasets
import altair as alt
data_wine = datasets.load_wine (as_frame = True).frame
new_data = data_wine.drop (['proline', 'magnesium'], axis = 1)
base = alt.Chart(
new_data
).transform_window(
index="count()"
).transform_fold(
["alcohol","malic_acid","ash","alcalinity_of_ash","total_phenols","flavanoids","nonflavanoid_phenols","proanthocyanins","color_intensity","hue","od280/od315_of_diluted_wines"]
).transform_joinaggregate(
min="min(value)",
max="max(value)",
groupby=["key"]
).transform_calculate(
norm_val="(datum.value - datum.min) / (datum.max - datum.min)",
mid="(datum.min + datum.max) / 2"
).properties(width=1200, height=300)
lines = base.mark_line(opacity=0.3).encode(
x='key:N',
y=alt.Y('norm_val:Q', axis=None),
color=alt.Color ('target:N'),
detail=alt.Detail ('index:N'),
)
rules = base.mark_rule(
color="#ccc", tooltip=None
).encode(
x="key:N",
detail="count():Q",
)
def ytick(yvalue, field):
scale = base.encode(x='key:N', y=alt.value(yvalue), text=f"min({field}):Q")
return alt.layer(
scale.mark_text(baseline="middle", align="right", dx=-5, tooltip=None),
scale.mark_tick(size=8, color="#ccc", orient="horizontal", tooltip=None)
)
alt.layer(
lines, rules, ytick(0, "max"), ytick(150, "mid"), ytick(300, "min")
).configure_axisX(
domain=False, labelAngle=0, tickColor="#ccc", title=None
).configure_view(
stroke=None
)
You could create a simpler parallel coordinates plot like this if you are OK with not having a separate y-axis for each column in the data frame:
from sklearn import datasets
import altair as alt
data = datasets.load_wine (as_frame=True).frame
num_cols = ["alcohol","malic_acid","ash","alcalinity_of_ash","total_phenols","flavanoids","nonflavanoid_phenols","proanthocyanins","color_intensity","hue","od280/od315_of_diluted_wines"]
# You could skip this rescaling but it would compress the y-axis range for columns with smaller absolute values
data[num_cols] = data[num_cols].apply(lambda x: (x - x.min()) / (x.max() - x.min()))
alt.Chart(data).transform_window(
index='count()'
).transform_fold(
num_cols
).mark_line().encode(
alt.X('key:O', title=None, scale=alt.Scale(nice=False, padding=0.05)),
alt.Y('value:Q', title=None),
alt.Color('target:N', title=None),
detail='index:N'
).properties(
width=1200
)
If you are using this for exploratory data analysis and don't need to customize the plot a lot, then you can also try out my experimental package [altair_ally][3]
for quickly creating some common exploratory plots:
from sklearn import datasets
import altair_ally as aly
data_wine = datasets.load_wine (as_frame = True).frame
data_wine['target'] = data_wine['target'].astype(str)
aly.parcoord(data_wine, color='target')