pythonscipyscipy.stats

How can I define the pdf of a Multivariate Log Normal Distribution using scipy classes?


I am looking to sample from the multivariate log normal distribution by using the class multivar_normal_gen in the scipy module. My strategy was to simply re-define the log multivariate normal pdf using the _logpdf method and then pass that into _dist in multivariate_normal_frozen class.

The form of the pdf is defined in this StackExchange post.

from typing import Dict, Union, List, Optional
from pathlib import Path
import pandas as pd
import numpy as np
from scipy.stats import Covariance
from scipy.stats._multivariate import multivariate_normal_frozen, multivariate_normal_gen
from matplotlib import pyplot as plt

_LOG_2PI = np.log(2 * np.pi)


class MultivariateNormalLogGen(multivariate_normal_gen):
    def _logpdf(self, x: np.array, mean: np.array, cov_object: Covariance) -> np.array:
        log_det_cov, rank = cov_object.log_pdet, cov_object.rank
        dev = np.log(x) - mean
        if dev.ndim > 1:
            log_det_cov = log_det_cov[..., np.newaxis]
            rank = rank[..., np.newaxis]
        maha = np.sum(np.square(cov_object.whiten(dev)) + 2 * np.log(x), axis=-1)

        return -0.5 * (rank * _LOG_2PI + log_det_cov + maha)


class MultivariateNormalLogFrozen(multivariate_normal_frozen):
    _dist = MultivariateNormalLogGen()

However, this does not result in the desired distribution as the limits are not between 0 and positive infinity. Below it's an example that uses the class methods and plots a supposedly bivariate log normal distribution.

class BivariateNormal(MultivariateNormalLogFrozen):

    def __init__(self,
                 parameter_names: List[str],
                 mu: np.array,
                 cov: Union[np.array, Covariance]):
        super(BivariateLogNormal, self).__init__(mean=mu, cov=cov)
        self.parameter_names = parameter_names
        self.mu_data = mu
        self.cov_data = cov

    def draw_sample(self, n: int, seed: Optional[int] = None) -> Dict[str, np.array]:
        """Draws samples from the distribution."""
        if seed:
            np.random.seed(seed)
        sample = self.rvs(n)  # np.exp(self.rvs(n))
        result = {}
        for j, par in enumerate(self.parameter_names):
            result[par] = sample[:, j]
        return result


def plot_distribution(
    dsn: BivariateLogNormal,
    sample: Dict[str, np.array]
) -> None:
    fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(6, 6))
    color = "tab:blue"
    cmap = "Blues"
    df = pd.DataFrame.from_dict(sample)
    ax.plot(
        df['par_1'], df['par_2'],
        '*',
        color=color,
        markeredgecolor='k',
        markersize=10,
    )

    # plot pdf
    xlims = ax.get_xlim()
    ylims = ax.get_ylim()
    xvec = np.linspace(start=xlims[0], stop=xlims[1], num=100)
    yvec = np.linspace(start=ylims[0], stop=ylims[1], num=100)
    x, y = np.meshgrid(xvec, yvec)
    xy = np.dstack((x, y))
    z = dsn.pdf(xy)  # FIXME: Change to lognormal pdf
    ax.contour(x, y, z, cmap=cmap, levels=100)
    ax.set_xlabel('parameter_1')
    ax.set_ylabel('parameter_2')


if __name__ == "__main__":
    seed = 124
    n_samples = 50

    # sampling from distribution
    parameter_names = ['par_1', 'par_2']

    mu = np.log(np.array([0.5, 0.5]))
    cov = np.array([
        [1.0, 0.0],
        [0.0, 1.0]
    ])
    dsn = BivariateLogNormal(mu=mu, cov=cov, parameter_names=parameter_names)
    sample = dsn.draw_sample(n_samples, seed=seed)

    # plot distributions
    plot_distribution(
        dsn=dsn,
        sample=sample,
    )

bivariate_normal_dsn

How can I achieve this properly? Should I define other methods like _pdf instead? Any help would be appreciated. Thank you!


Solution

  • After Matt's answer, here is my workaround on the pdf and the sampling from a Multivariate Log Normal. Not super smart, but it works.

    from typing import Dict, List, Optional
    import pandas as pd
    import numpy as np
    from scipy.stats import Covariance, multivariate_normal
    
    _LOG_2PI = np.log(2 * np.pi)
    
    class BivariateLogNormal:
    
        def __init__(self, 
                     parameter_names: List[str], 
                     mean: np.array, 
                     cov: Covariance) -> None:
            self.parameter_names = parameter_names
            self.mean = mean
            self.cov = cov
    
        def rvs(self, 
                size: int, 
                seed: Optional[int]) -> Dict[str, np.array]:
    
            if seed:
                np.random.seed(seed)
    
            multi_norm = multivariate_normal(self.mean, self.cov)
            sample = np.exp(multi_norm.rvs(size=size))
    
            result = {}
            for j, par in enumerate(self.parameter_names):
                result[par] = sample[:, j]
    
            return result
    
        def logpdf(self,  
                   x: np.array) -> np.array:
            log_det_cov, rank = self.cov.log_pdet, self.cov.rank
            dev = np.log(x) - self.mean
            if dev.ndim > 1:
                log_det_cov = log_det_cov[..., np.newaxis]
                rank = rank[..., np.newaxis]
            maha = np.sum(np.square(self.cov.whiten(dev)) + 2 * np.log(x), axis=-1)
    
            return -0.5 * (rank * _LOG_2PI + log_det_cov + maha)
    
        def pdf(self, 
                x: np.array) -> np.array:
            return np.exp(self.logpdf(x))