I am encountering issues computing s-weights using extended binned models with compute_sweights()
.
The method works fine for unbinned pdfs, but when using binned pdfs, it gives a ModelNotFittedToData
error despite the binned model fitting quite well...
An example:
import numpy as np
import os
os.environ["ZFIT_DISABLE_TF_WARNINGS"] = "1"
import zfit
from hepstats.splot import compute_sweights
class FirstOrderPoly(zfit.pdf.ZPDF):
"""First order polynomial `a * x + b`"""
_PARAMS = ['a']
@zfit.supports(norm=False)
def _pdf(self, x, norm, params):
del norm
data = x[0]
a = params['a']
return 1 + a * data
def cdf_poly(limit, a):
return limit + 0.5 * a * limit ** 2
def integral_func(limits, norm, params, model):
del norm, model
a = params['a']
lower, upper = limits.v1.limits
integral = cdf_poly(upper, a) - cdf_poly(lower, a)
return integral
integral_limits = zfit.Space(axes=(0,), limits=(zfit.Space.ANY, zfit.Space.ANY))
FirstOrderPoly.register_analytic_integral(func=integral_func, limits=integral_limits)
binned = True
nbkg = 20000
nsig = 45000
bounds = (1920, 2050)
np.random.seed(0)
bkg = np.random.uniform(1920, 2050, nbkg)
peak = np.random.normal(1969, 6.75, nsig)
mass = np.concatenate((bkg,peak))
sel = (mass > bounds[0]) & (mass < bounds[1])
mass = mass[sel]
sorter = np.argsort(mass)
mass = mass[sorter]
obs = zfit.Space("x", limits=bounds)
mean = zfit.Parameter("mean", 1969, 1920, 2050)
sigma = zfit.Parameter("sigma", 6.75, 0.02, 10)
bgA = zfit.Parameter("bgA", 0, -1.0, 1.0)
Nsig = zfit.Parameter("Nsig", nsig, 0.0, len(mass))
Nbkg = zfit.Parameter("Nbkg", nbkg, 0.0, len(mass))
if binned:
data = zfit.Data(data=mass, obs=obs).to_binned(100)
signal = zfit.pdf.Gauss(obs=obs, mu=mean, sigma=sigma).create_extended(Nsig).to_binned(data.space)
bg = FirstOrderPoly(obs=obs, a=bgA).create_extended(Nbkg).to_binned(data.space)
tot_model = zfit.pdf.BinnedSumPDF(pdfs=[signal, bg])
nll = zfit.loss.ExtendedBinnedNLL(model=tot_model, data=data)
else:
data = zfit.Data(data=mass, obs=obs)
signal = zfit.pdf.Gauss(obs=obs, mu=mean, sigma=sigma).create_extended(Nsig)
bg = FirstOrderPoly(obs=obs, a=bgA).create_extended(Nbkg)
tot_model = zfit.pdf.SumPDF(pdfs=[signal, bg])
nll = zfit.loss.ExtendedUnbinnedNLL(model=tot_model, data=data)
minimizer = zfit.minimize.Minuit()
result = minimizer.minimize(loss=nll)
result.hesse()
sweights = compute_sweights(tot_model, data)[Nsig]
I notice that this error is thrown in the source for compute_sweights()
dependent on the output of eval_pdf()
, which in the documentation seems to sample from the pdf as though it were unbinned - is this a bug, or is there some other way I should be defining my extended unbinned composite models to get around this?
This could very well be an implementation issue. sweights used to support only unbinned fits, as for a long time unbinned fits where the only possibility in zfit. It could very well be possible that, as binned fits became available, simply nobody thought of this option.
I would therefore suggest to head over to hepstats and report it there or better, suggest a fix. To try out what works, simply change it locally (it's probably an easy fix?) and suggest it in an issue, most likely a PR will gladly be accepted.