python graph plotly plotly-dash portfolio

transfer custom plotfunction to plotly

I am using the package riskfolio-lib. It allows you to do portfolio optimizations and plot all sorts of statistics for a portfolio of assets. As well as customized charts that I want to use in my plotly dash dashboard.

The Following Code produces a network chart. I am wondering how to get all necessary data from ax or anywhere else to reproduce the chart using plotly similiar as shown here or here? As get_xdata() or get_ydata() don`t seem to work.

import yfinance as yf
import riskfolio as rp

# Date range
start = '2016-01-01'
end = '2019-12-30'

# Tickers of assets
tickers = ['JCI', 'TGT', 'CMCSA', 'CPB', 'MO', 'APA', 'MMC', 'JPM',
           'ZION', 'PSA', 'BAX', 'BMY', 'LUV', 'PCAR', 'TXT', 'TMO',
           'DE', 'MSFT', 'HPQ', 'SEE', 'VZ', 'CNP', 'NI', 'T', 'BA']
tickers.sort()

# Downloading the data
data = yf.download(tickers, start = start, end = end)
data = data.loc[:,('Adj Close', slice(None))]
data.columns = tickers
assets = data.pct_change().dropna()

ax = rp.plot_network(returns=assets, codependence="pearson",
                     linkage="ward", k=None, max_k=10,
                     alpha_tail=0.05, leaf_order=True,
                     kind='spring', ax=None)

Solution

if you look at riskfolio-lib code on GitHub you can find the implementation of plot_network(). With only a little refactoring you can remove matplotlib code and return pos (positions of nodes) and G networkx graph
with these you have everything needed in the two links you referenced. I've coded a function that will generate a plotly figure from these two inputs
have generated pandas data frames from these inputs to then be able to use plotly express to generate figure
take a look - it's maybe better that this gets contributed back to the project on GitHub as an issue/PR. However I see that are none....

solution

import yfinance as yf
import riskfolio as rp

# Date range
start = "2016-01-01"
end = "2019-12-30"
tickers = ['JCI', 'TGT', 'CMCSA', 'CPB', 'MO', 'APA', 'MMC', 'JPM',
           'ZION', 'PSA', 'BAX', 'BMY', 'LUV', 'PCAR', 'TXT', 'TMO',
           'DE', 'MSFT', 'HPQ', 'SEE', 'VZ', 'CNP', 'NI', 'T', 'BA']  # fmt: skip
tickers.sort()

# Downloading the data
data = yf.download(tickers, start=start, end=end)
data = data.loc[:, ("Adj Close", slice(None))]
data.columns = tickers
assets = data.pct_change().dropna()

# matplotlib
ax = rp.plot_network(
    returns=assets,
    codependence="pearson",
    linkage="ward",
    k=None,
    max_k=10,
    alpha_tail=0.05,
    leaf_order=True,
    kind="spring",
    ax=None,
)

# plotly
plotly_network(
    *plot_network(
        returns=assets,
        codependence="pearson",
        linkage="ward",
        k=None,
        max_k=10,
        alpha_tail=0.05,
        leaf_order=True,
        kind="spring",
        ax=None,
    )
)

code

import pandas as pd
import numpy as np
from scipy.spatial.distance import squareform
import scipy.cluster.hierarchy as hr
import networkx as nx
import riskfolio.AuxFunctions as af
import plotly.express as px


def plot_network(
    returns,
    custom_cov=None,
    codependence="pearson",
    linkage="ward",
    k=None,
    max_k=10,
    bins_info="KN",
    alpha_tail=0.05,
    gs_threshold=0.5,
    leaf_order=True,
    kind="spring",
    seed=0,
    node_labels=True,
    node_size=1400,
    node_alpha=0.7,
    font_size=10,
    title="",
    height=8,
    width=10,
    ax=None,
):
    if not isinstance(returns, pd.DataFrame):
        raise ValueError("returns must be a DataFrame")

    labels = np.array(returns.columns.tolist())

    # Calculating codependence matrix and distance metric
    if codependence in {"pearson", "spearman", "kendall"}:
        codep = returns.corr(method=codependence)
        dist = np.sqrt(np.clip((1 - codep) / 2, a_min=0.0, a_max=1.0))
    elif codependence == "gerber1":
        codep = gs.gerber_cov_stat1(returns, threshold=gs_threshold)
        codep = af.cov2corr(codep)
        dist = np.sqrt(np.clip((1 - codep) / 2, a_min=0.0, a_max=1.0))
    elif codependence == "gerber2":
        codep = gs.gerber_cov_stat2(returns, threshold=gs_threshold)
        codep = af.cov2corr(codep)
        dist = np.sqrt(np.clip((1 - codep) / 2, a_min=0.0, a_max=1.0))
    elif codependence in {"abs_pearson", "abs_spearman", "abs_kendall"}:
        codep = np.abs(returns.corr(method=codependence[4:]))
        dist = np.sqrt(np.clip((1 - codep), a_min=0.0, a_max=1.0))
    elif codependence in {"distance"}:
        codep = af.dcorr_matrix(returns).astype(float)
        dist = np.sqrt(np.clip((1 - codep), a_min=0.0, a_max=1.0))
    elif codependence in {"mutual_info"}:
        codep = af.mutual_info_matrix(returns, bins_info).astype(float)
        dist = af.var_info_matrix(returns, bins_info).astype(float)
    elif codependence in {"tail"}:
        codep = af.ltdi_matrix(returns, alpha_tail).astype(float)
        dist = -np.log(codep)
    elif codependence in {"custom_cov"}:
        codep = af.cov2corr(custom_cov).astype(float)
        dist = np.sqrt(np.clip((1 - codep) / 2, a_min=0.0, a_max=1.0))

    # Hierarchical clustering
    dist = dist.to_numpy()
    dist = pd.DataFrame(dist, columns=codep.columns, index=codep.index)
    if linkage == "DBHT":
        # different choices for D, S give different outputs!
        D = dist.to_numpy()  # dissimilarity matrix
        if codependence in {"pearson", "spearman", "custom_cov"}:
            S = (1 - dist**2).to_numpy()
        else:
            S = codep.copy().to_numpy()  # similarity matrix
        (_, Rpm, _, _, _, clustering) = db.DBHTs(
            D, S, leaf_order=leaf_order
        )  # DBHT clustering
        MAdj = pd.DataFrame(Rpm, index=labels, columns=labels)
        G = nx.from_pandas_adjacency(MAdj)
    else:
        p_dist = squareform(dist, checks=False)
        clustering = hr.linkage(p_dist, method=linkage, optimal_ordering=leaf_order)
        T = nx.from_pandas_adjacency(codep)  # create a graph G from a numpy matrix
        G = nx.minimum_spanning_tree(T)

    # optimal number of clusters
    if k is None:
        k = af.two_diff_gap_stat(codep, dist, clustering, max_k)

    clustering_inds = hr.fcluster(clustering, k, criterion="maxclust")
    clusters = {i: [] for i in range(min(clustering_inds), max(clustering_inds) + 1)}
    for i, v in enumerate(clustering_inds):
        clusters[v].append(labels[i])

    # Layout options
    node_options = {
        "node_size": node_size,
        "alpha": node_alpha,
    }
    font_options = {
        "font_size": font_size,
        "font_color": "k",
    }

    label_options = {"ec": "k", "fc": "white", "alpha": 0.7}

    if kind == "spring":
        pos = nx.spring_layout(G, seed=seed)
    elif kind == "planar":
        pos = nx.planar_layout(G)
    elif kind == "circular":
        pos = nx.circular_layout(G)
    elif kind == "kamada":
        if codependence in {"pearson", "spearman"} and linkage != "DBHT":
            raise NameError(
                "kamada layout only works with positive codependence measures except when linkage is DBHT."
            )
        pos = nx.kamada_kawai_layout(G)

    return pos, G


def plotly_network(pos, G):
    df_n = (
        nx.to_pandas_adjacency(G)
        .stack()
        .loc[lambda s: s != 0]
        .groupby(level=0)
        .apply(
            lambda d: pd.DataFrame(
                {"linked": len(d), "info": [d.index.get_level_values(1).tolist()]}
            )
        )
        .droplevel(1)
        .reset_index()
    )

    df_n = df_n.join(
        df_n["index"].map(pos).apply(pd.Series).rename(columns={0: "x", 1: "y"})
    )

    cmap = [
        (r, c)
        for r, c in zip(
            np.repeat(np.linspace(0, 1, df_n["linked"].max()), 2)[1:],
            np.repeat(px.colors.qualitative.Set3, 2),
        )
    ]

    df_e = nx.to_pandas_edgelist(G)
    df_e = pd.concat(
        [
            pd.DataFrame(
                {
                    c: [pos[r[n]][i] for n in df_e.columns[:-1]] + [None]
                    for i, c in enumerate(list("xy"))
                }
            )
            for _, r in df_e.iterrows()
        ]
    )

    fig = px.line(df_e, x="x", y="y", color_discrete_sequence=["#888"])

    fig = fig.add_traces(
        px.scatter(
            df_n, x="x", y="y", color="linked", text="index", hover_data=["info"]
        )
        .update_traces(marker_size=37)
        .data
    )

    fig = fig.update_layout(
        xaxis=dict(
            showgrid=False,
            zeroline=False,
            showticklabels=False,
            mirror=True,
            title={"text": ""},
        ),
        yaxis=dict(
            showgrid=False,
            zeroline=False,
            showticklabels=False,
            mirror=True,
            title={"text": ""},
        ),
        coloraxis={"colorscale": cmap},
        margin={"t": 20, "b": 0, "l": 0, "b": 0},
        height=500,
        width=650,
    )

    return fig