I am looking to sample from the multivariate log normal distribution by using the class multivar_normal_gen
in the scipy
module. My strategy was to simply re-define the log multivariate normal pdf using the _logpdf
method and then pass that into _dist
in multivariate_normal_frozen
class.
The form of the pdf is defined in this StackExchange post.
from typing import Dict, Union, List, Optional
from pathlib import Path
import pandas as pd
import numpy as np
from scipy.stats import Covariance
from scipy.stats._multivariate import multivariate_normal_frozen, multivariate_normal_gen
from matplotlib import pyplot as plt
_LOG_2PI = np.log(2 * np.pi)
class MultivariateNormalLogGen(multivariate_normal_gen):
def _logpdf(self, x: np.array, mean: np.array, cov_object: Covariance) -> np.array:
log_det_cov, rank = cov_object.log_pdet, cov_object.rank
dev = np.log(x) - mean
if dev.ndim > 1:
log_det_cov = log_det_cov[..., np.newaxis]
rank = rank[..., np.newaxis]
maha = np.sum(np.square(cov_object.whiten(dev)) + 2 * np.log(x), axis=-1)
return -0.5 * (rank * _LOG_2PI + log_det_cov + maha)
class MultivariateNormalLogFrozen(multivariate_normal_frozen):
_dist = MultivariateNormalLogGen()
However, this does not result in the desired distribution as the limits are not between 0 and positive infinity. Below it's an example that uses the class methods and plots a supposedly bivariate log normal distribution.
class BivariateNormal(MultivariateNormalLogFrozen):
def __init__(self,
parameter_names: List[str],
mu: np.array,
cov: Union[np.array, Covariance]):
super(BivariateLogNormal, self).__init__(mean=mu, cov=cov)
self.parameter_names = parameter_names
self.mu_data = mu
self.cov_data = cov
def draw_sample(self, n: int, seed: Optional[int] = None) -> Dict[str, np.array]:
"""Draws samples from the distribution."""
if seed:
np.random.seed(seed)
sample = self.rvs(n) # np.exp(self.rvs(n))
result = {}
for j, par in enumerate(self.parameter_names):
result[par] = sample[:, j]
return result
def plot_distribution(
dsn: BivariateLogNormal,
sample: Dict[str, np.array]
) -> None:
fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(6, 6))
color = "tab:blue"
cmap = "Blues"
df = pd.DataFrame.from_dict(sample)
ax.plot(
df['par_1'], df['par_2'],
'*',
color=color,
markeredgecolor='k',
markersize=10,
)
# plot pdf
xlims = ax.get_xlim()
ylims = ax.get_ylim()
xvec = np.linspace(start=xlims[0], stop=xlims[1], num=100)
yvec = np.linspace(start=ylims[0], stop=ylims[1], num=100)
x, y = np.meshgrid(xvec, yvec)
xy = np.dstack((x, y))
z = dsn.pdf(xy) # FIXME: Change to lognormal pdf
ax.contour(x, y, z, cmap=cmap, levels=100)
ax.set_xlabel('parameter_1')
ax.set_ylabel('parameter_2')
if __name__ == "__main__":
seed = 124
n_samples = 50
# sampling from distribution
parameter_names = ['par_1', 'par_2']
mu = np.log(np.array([0.5, 0.5]))
cov = np.array([
[1.0, 0.0],
[0.0, 1.0]
])
dsn = BivariateLogNormal(mu=mu, cov=cov, parameter_names=parameter_names)
sample = dsn.draw_sample(n_samples, seed=seed)
# plot distributions
plot_distribution(
dsn=dsn,
sample=sample,
)
How can I achieve this properly? Should I define other methods like _pdf
instead? Any help would be appreciated. Thank you!
After Matt's answer, here is my workaround on the pdf and the sampling from a Multivariate Log Normal. Not super smart, but it works.
from typing import Dict, List, Optional
import pandas as pd
import numpy as np
from scipy.stats import Covariance, multivariate_normal
_LOG_2PI = np.log(2 * np.pi)
class BivariateLogNormal:
def __init__(self,
parameter_names: List[str],
mean: np.array,
cov: Covariance) -> None:
self.parameter_names = parameter_names
self.mean = mean
self.cov = cov
def rvs(self,
size: int,
seed: Optional[int]) -> Dict[str, np.array]:
if seed:
np.random.seed(seed)
multi_norm = multivariate_normal(self.mean, self.cov)
sample = np.exp(multi_norm.rvs(size=size))
result = {}
for j, par in enumerate(self.parameter_names):
result[par] = sample[:, j]
return result
def logpdf(self,
x: np.array) -> np.array:
log_det_cov, rank = self.cov.log_pdet, self.cov.rank
dev = np.log(x) - self.mean
if dev.ndim > 1:
log_det_cov = log_det_cov[..., np.newaxis]
rank = rank[..., np.newaxis]
maha = np.sum(np.square(self.cov.whiten(dev)) + 2 * np.log(x), axis=-1)
return -0.5 * (rank * _LOG_2PI + log_det_cov + maha)
def pdf(self,
x: np.array) -> np.array:
return np.exp(self.logpdf(x))