I need to use the function erfz (error function that takes complex arguments) for fitting Voigt profiles. It seems like it is only implemented in R in the pracma package as erfz(z).
I tried to evaluate erfz(0), which throws an error, when it should output 0.
My R is in french, so here's the error: "Erreur dans if (sum(work.i) == 0) break : valeur manquante là où TRUE / FALSE est requis".
I created a function to replace erfz(x), that works with x being any vectors potentially containing 0. Here it is:
erfzfix <- function(x){
if (0%in%x){
ind=seq(1,length(x))
x0<-x==0
x0n<-ind[x0]
xstar<-x[!x0]
outputstar<-erfz(xstar)
output<-append(outputstar,0,after = x0n-1)
}
else output=erfz(x)
return(output)
}
I'm not very experienced with R but I guess this will considerably slow the evaluation of the function. Is there a better way to fix it?
EDIT: Other numerical issues are present in the evaluation of erfz, that make fitting procedures struggle a lot because (I guess) of the non-C1 character of the function. This plot shows a "noisy" part of erfz:

There was a package (NORMT3) with a very good implementation of the Faddeeva function (wofz), that has been removed from CRAN. You can install a previous version from archive:
install.packages("https://cran.r-project.org/src/contrib/Archive/NORMT3/NORMT3_1.0.4.tar.gz", repos = NULL, type = "source")
NORMT3::wofz is faster and generally more stable than the equivalent version using pracma::erfz. Demonstrating:
library(NORMT3)
library(pracma)
n <- 1e6
z <- runif(n, -1, 1) + runif(n, -1, 1)*1i
system.time(x <- wofz(z))
#> user system elapsed
#> 0.52 0.02 0.56
system.time(y <- (1 - erfz(-z*1i))*exp(-z^2))
#> user system elapsed
#> 3.01 0.98 4.14
all.equal(x, y)
#> [1] TRUE
erfz has difficulty with some values:
options(digits = 17)
z <- -0.781092052676692+9.81309040718471i
wofz(z)
#> [1] 0.056848372352677853-0.0044794129562583922i
(1 - erfz(-z*1i))*exp(-z^2)
#> [1] 0.0062105843486744616+0.015635039358870357i
# From Wolfram Alpha
0.056848372352677852 - 0.0044794129562583920i
#> [1] 0.056848372352677853-0.0044794129562583922i
However, for values for which the Faddeeva function returns very large real or imaginary components, wofz can error:
z <- -5.31444981258659 - 251.466791935272i
wofz(z)
#> Error in wofz(z): Error code from TOMS 680 was 1
(1 - erfz(-z*1i))*exp(-z^2)
#> [1] -Inf-Infi
# From Wolfram Alpha
-5.958323561e27450 - 4.818834377e27450i
#> [1] -Inf-Infi
Another option is the RcppFaddeeva package with the Faddeeva_w function:
install.packages("https://cran.r-project.org/src/contrib/Archive/RcppFaddeeva/RcppFaddeeva_0.2.2.tar.gz", repos = NULL, type = "source")
In case it helps, here is an algorithm I've used to fit the Voigt profile with a location parameter. First some sample data and a few constants:
library(NORMT3)
(seed <- sample(.Machine$integer.max, 1))
#> [1] 2132433003
set.seed(seed)
# sample from the Voigt profile with a location parameter (`mu`)
n <- 1e6L
mu <- rexp(1)/runif(1, -1, 1) # location parameter
sigma <- rexp(1)/runif(1) # Gaussian scale parameter
gamma <- rexp(1)/runif(1) # Cauchy scale parameter
x <- sort(rcauchy(n, 0, gamma) + rnorm(n, mu, sigma))
# constants
sqrt2 <- sqrt(2)
logpi <- log(pi)
log2 <- log(2)
medx <- median(x)
The algorithm first finds initial parameter guesses by fitting the empirical characteristic function.
T1 <- x - medx
f <- function(z) sum(cos(z*T1))/n
t0 <- 0.5
if (f(t0) > 0.01) {
while (f(t0 <- t0*2) > 0.01) {}
} else {
while (f(t0 <- t0/2) < 0.01) {}
t0 <- t0*2
}
f <- function(z) rowMeans(cos(outer(z, T1)))
tt <- 2^(seq(log2(t0), by = -1, length.out = 9))
yy <- f(tt)
f <- function(par) {
sum((exp(-((exp(par[1])*tt)^2/2 + exp(par[2])*abs(tt))) - yy)^2)
}
par0 <- `dim<-`(c(rep(-1:1, 3), rep(-1:1, each = 3)), c(9, 2))
bestfit <- optim(par0[1,], f, method = "BFGS")
for (i in 2:9) {
fit0 <- optim(par0[i,], f, method = "BFGS")
if (fit0$value < bestfit$value) bestfit <- fit0
}
par0 <- c(medx, bestfit$par)
Now the maximum likelihood estimation:
# negative log-likelihood function
fNLL <- function(par) {
t1 <- exp(par[2])*sqrt2
z <- complex(n, (x - par[1])/t1, exp(par[3])/t1)
out <- Inf
try(out <- n*((log2 + logpi)/2 + par[2]) - sum(log(Re(wofz(z)))), TRUE)
out
}
MLE <- optim(par0, fNLL, method = "BFGS")
c(mu = MLE$par[1], sigma = exp(MLE$par[2]), gamma = exp(MLE$par[3])) # estimates
#> mu sigma gamma
#> 10.8649753 0.6290871 0.1124877
c(mu = mu, sigma = sigma, gamma = gamma) # actual parameter values
#> mu sigma gamma
#> 10.8651009 0.6308690 0.1126534
options(digits = 16)
MLE$value # negative log-likelihood at MLE
#> [1] 1317177.021824002
fNLL(c(mu, log(c(sigma, gamma)))) # negative log-likelihood at actual parameters
#> [1] 1317181.468077655
NORMT3::wofz#include <Rcpp.h>
#include <complex>
#include <cmath>
using namespace Rcpp;
// [[Rcpp::export]]
ComplexVector wofz_cpp(const ComplexVector& zvec) {
const double FACTOR = 1.12837916709551257388; // 2/sqrt(pi)
const double RMAXREAL = 0.5e154;
const double RMAXEXP = 708.503061461606;
const double RMAXGONI = 3.53711887601422e15;
int n = zvec.size();
Rcomplex out;
ComplexVector result(n);
for (int idx = 0; idx < n; ++idx) {
Rcomplex z_in = zvec[idx];
double xi = z_in.r;
double yi = z_in.i;
double u = 0.0, v = 0.0;
bool flag = false;
double xabs = std::fabs(xi);
double yabs = std::fabs(yi);
double x = xabs / 6.3;
double y = yabs / 4.4;
// Protect against overflow in QRHO
if (xabs > RMAXREAL || yabs > RMAXREAL) {
flag = true;
}
if (!flag) {
double qrho = x*x + y*y;
double xabsq = xabs*xabs;
double xquad = xabsq - yabs*yabs;
double yquad = 2.0*xabs*yabs;
bool A = (qrho < 0.085264);
if (A) {
// Power-series expansion
qrho = (1.0 - 0.85*y)*std::sqrt(qrho);
int N = (int)std::llround(6.0 + 72.0*qrho);
int J = 2*N + 1;
double xsum = 1.0 / J;
double ysum = 0.0;
for (int i = N; i >= 1; --i) {
J -= 2;
double xaux = (xsum*xquad - ysum*yquad) / i;
ysum = (xsum*yquad + ysum*xquad) / i;
xsum = xaux + 1.0 / J;
}
double U1 = -FACTOR*(xsum*yabs + ysum*xabs) + 1.0;
double V1 = FACTOR*(xsum*xabs - ysum*yabs);
double daux = std::exp(-xquad);
double U2 = daux*std::cos(yquad);
double V2 = -daux*std::sin(yquad);
u = U1*U2 - V1*V2;
v = U1*V2 + V1*U2;
} else {
double H, H2 = 0.0;
int KAPN, NU;
if (qrho > 1.0) {
H = 0.0;
KAPN = 0;
double qrho_sqrt = std::sqrt(qrho);
NU = (int)std::floor(3.0 + (1442.0 / (26.0*qrho_sqrt + 77.0)));
} else {
qrho = (1.0 - y)*std::sqrt(1.0 - qrho);
H = 1.88*qrho;
H2 = 2.0*H;
KAPN = (int)std::llround(7.0 + 34.0*qrho);
NU = (int)std::llround(16.0 + 26.0*qrho);
}
bool B = (H > 0.0);
double qlambda = (B ? std::pow(H2, KAPN) : 0.0);
double rx = 0.0, ry = 0.0, sx = 0.0, sy = 0.0;
for (int n = NU; n >= 0; --n) {
int np1 = n + 1;
double tx = yabs + H + np1*rx;
double ty = xabs - np1*ry;
double C = 0.5 / (tx*tx + ty*ty);
rx = C*tx;
ry = C*ty;
if (B && n <= KAPN) {
double tx2 = qlambda + sx;
double sx_new = rx*tx2 - ry*sy;
double sy_new = ry*tx2 + rx*sy;
sx = sx_new;
sy = sy_new;
qlambda /= H2;
}
}
if (H == 0.0) {
u = FACTOR*rx;
v = FACTOR*ry;
} else {
u = FACTOR*sx;
v = FACTOR*sy;
}
if (yabs == 0.0)
u = std::exp(-xabs*xabs);
}
// Reflect into other quadrants
if (yi < 0.0) {
double U2, V2;
if (A) {
double daux = std::exp(-xquad);
U2 = 2.0*daux*std::cos(yquad);
V2 = -2.0*daux*std::sin(yquad);
} else {
xquad = -xquad;
if (yquad > RMAXGONI || xquad > RMAXEXP) {
flag = true;
} else {
double w1 = 2.0*std::exp(xquad);
U2 = w1*std::cos(yquad);
V2 = -w1*std::sin(yquad);
}
}
if (!flag) {
u = U2 - u;
v = V2 - v;
if (xi > 0.0) v = -v;
}
} else {
if (xi < 0.0) v = -v;
}
}
if (flag) {
// overflow or invalid
out.r = NA_REAL;
out.i = NA_REAL;
result[idx] = out;
} else {
out.r = u;
out.i = v;
}
result[idx] = out;
}
return result;
}
Running the same checks as before:
library(NORMT3)
library(Rcpp)
Rcpp::sourceCpp("C:/temp/wofz.cpp")
(seed <- sample(.Machine$integer.max, 1))
#> [1] 1741735938
set.seed(seed)
n <- 1e6
z <- runif(n, -1, 1) + runif(n, -1, 1)*1i
system.time(x <- wofz(z))
#> user system elapsed
#> 0.51 0.00 0.52
system.time(y <- wofz_cpp(z))
#> user system elapsed
#> 0.36 0.00 0.35
all.equal(x, y)
#> [1] TRUE
options(digits = 17)
z <- -0.781092052676692+9.81309040718471i
wofz(z)
#> [1] 0.056848372352677853-0.0044794129562583922i
wofz_cpp(z)
#> [1] 0.056848372352677853-0.0044794129562583922i
# From Wolfram Alpha
0.056848372352677852 - 0.0044794129562583920i
#> [1] 0.056848372352677853-0.0044794129562583922i
z <- -5.31444981258659 - 251.466791935272i
wofz(z)
#> Error in wofz(z): Error code from TOMS 680 was 1
wofz_cpp(z)
#> [1] NA
# From Wolfram Alpha
-5.958323561e27450 - 4.818834377e27450i
#> [1] -Inf-Infi
As a bonus, the Rcpp version is even faster than the Fortran version from NORMT3::wofz.