pythonnumpymle

RuntimeWarning: divide by zero encountered in log in Max likelihood estimate python code


i'm simulating the effect of log maximum likelihood estimate (for Bionomial distribution) as number of samples increases. I choose a true parameter value of 0.6 for bionomial distribution from which the responses are coming from.

but i'm getting the error even when i remove 0 from the possible parameters i'm using for my analysis.

  1. fsd
fods23/simulations/scripts/Binomial_MLE_simulations.py:19: RuntimeWarning: divide by zero encountered in log
  cal_llh = np.log(theta**(x) * (1-theta)**(1-x))
############################################################
## Step 1#
############################################################
# function to calculate likelihood for bernoulli
def logLikelihood(theta, x):
    # cal the log likelihood of each observation in the samples collected
    cal_llh = np.log(theta**(x) * (1-theta)**(1-x))
    tlld = np.prod(cal_llh)# cal the total likleihood
    return tlld

# function to calculate
def mle_Binom(X_samples, thetas):
    loglikelihood_single_theta = [logLikelihood(theta=t, x=X_samples) for t in thetas]
    # mle_val=thetas[np.argmax(likelihood_single_theta)] #get the maximum likelihood estimate
    return np.array(loglikelihood_single_theta)

# test the functions
true_params_Bern = 0.6


############################################################
## Step 2#
############################################################
# how does the likelihood plot changes as sample size changes 
Bern_Nsamples = np.linspace(start=100, stop=1000, num=100, dtype=int)
response_Bernoulli = np.random.binomial(n=1, p=0.6, size=100)
possible_thetas = np.linspace(start=0.001, stop=1, num=100)
result_theta = np.ma.array(possible_thetas.copy())

Bern_Nsamples = np.linspace(start=100, stop=1000, num=100, dtype=int)
beta_for_mle_holder = []

def Bernoulli_optim_nSamples(Bern_stepSize, rand_sets_thetas): 
    for n in Bern_stepSize:
        response_Bernoulli = np.random.binomial(n=1, p=0.6, size=n)
        mle_out_Binom = mle_Binom(X_samples=response_Bernoulli, thetas=rand_sets_thetas) #cal lld of specific theta
        max_theta_Binom = rand_sets_thetas[np.argmax(mle_out_Binom)] #which theta gave us max lld
        beta_for_mle_holder.append(max_theta_Binom)
    fig, ax = plt.subplots()
    ax.plot(Bern_stepSize, beta_for_mle_holder)
    ax.set_title('Bernoulli dist nSamples vrs MLE')
    ax.hlines(y=0.6, xmin=min(Bern_stepSize), xmax=max(Bern_stepSize), linestyles="dashed", color="red", label="MLE")
    plt.xlabel("nSamples")
    plt.ylabel("MLE")
    plt.show()


Bernoulli_optim_nSamples(Bern_stepSize=Bern_Nsamples, rand_sets_thetas=result_theta)

Solution

  • If I correctly understood your problem, you want to perform MLE to solve the probability p for Binomial B(k,n=1,p) using MLE and display the Likelihood curve to contextualize the optimum found.

    Lets create a MCVE to do so:

    import numpy as np
    import matplotlib.pyplot as plt
    from scipy import stats, optimize
    

    The MLE problem is formulated as follow (we use negative likelihood as we will minimize):

    def likelihood(beta, data):
        return - stats.binom.logpmf(k=data, n=1, p=beta[0]).sum()
    

    We create a law with your parameters:

    X = stats.binom(n=1, p=0.6)
    

    And sample from it:

    np.random.seed(12345)
    data = X.rvs(500)
    

    Now solving the MLE problem is as simple as:

    result = optimize.minimize(
        likelihood, x0=[0.5], args=(data,),
        bounds=[(0., 1.)], method='Powell'
    )
    #    direc: array([[1.]])
    #      fun: 341.3715241024539
    #  message: 'Optimization terminated successfully.'
    #     nfev: 20
    #      nit: 2
    #   status: 0
    #  success: True
    #        x: array([0.57200071])
    

    Now we vectorize the objective function:

    @np.vectorize
    def L(beta):
        return likelihood([beta], data)
    

    And draw its value for several possible parameters:

    t = np.linspace(0., 1., 100)
    
    fig, axe = plt.subplots()
    axe.plot(t, L(t))
    axe.scatter(result.x, L(result.x))
    axe.grid()
    

    Graphically we have:

    enter image description here