I am fitting a Log Pearson III distribution to my streamflow data. After the fitting, I'd like to plot the observed values and the fitted distribution.
However, the figure is not what I expect:
1. The x axis label does not show 0.99 0.02, and 0.01, although I have set the breaks
to be c(0.99, 0.9, 0.5, 0.2, 0.1, 0.02, 0.01).
2. How to plot the fitted Log Pearson III distribution on the figure? I have provided an example figure for reference, thanks for any help.
library(fitdistrplus)
library(PearsonDS)
library(ggplot2)
low_flows <- data.frame(Year = 1981:2010, Flow = c(0.03357143, 0.01328571, 0.02285714, 0.02657143, 0.04957143, 0.04085714, 0.16900000, 0.01057143,
0.04128571, 0.10871429, 0.08771429, 0.09585714, 0.22057143, 0.11571428, 0.08300000, 0.11257143,
0.13614286, 0.07742857, 0.09785714, 0.04728571, 0.04300000, 0.08385714, 0.02828571, 0.07271429,
0.21428571, 0.10142857, 0.04400000, 0.10928571, 0.12471429, 0.31500000))
Log_mydata <- log10(low_flows$Flow)
m <- mean(Log_mydata)
v <- stats::var(Log_mydata)
g <- e1071::skewness(Log_mydata, type = 2)
my_shape <- (2/g)^2
my_scale <- sqrt(v)/sqrt(my_shape) * sign(g)
my_location <- m - my_scale * my_shape
start <- list(shape = my_shape, location = my_location, scale = my_scale)
dPIII <<- function(x, shape, location, scale) PearsonDS::dpearsonIII(x,
shape, location, scale, log = FALSE)
pPIII <<- function(q, shape, location, scale) PearsonDS::ppearsonIII(q,
shape, location, scale, lower.tail = TRUE, log.p = FALSE)
qPIII <<- function(p, shape, location, scale) PearsonDS::qpearsonIII(p,
shape, location, scale, lower.tail = TRUE, log.p = FALSE)
fit_lp3 <- fitdist(Log_mydata, distr = "PIII", start = start)
plotdata = low_flows
plotdata <- plotdata[order(plotdata$Flow), ]
plotdata$prob <- seq_len(length(plotdata$Flow))/(length(plotdata$Flow) + 1)
prob_scale_points = c(0.99, 0.9, 0.5, 0.2, 0.1, 0.02, 0.01)
ggplot(data = plotdata, aes(x = prob, y = Flow)) +
geom_point(size = 2) +
xlab("Probability") +
scale_x_continuous(transform = scales::probability_trans("norm", lower.tail = FALSE),
breaks = prob_scale_points,
sec.axis = dup_axis(name = "Return Period",
labels = function(x) {ifelse(1/x < 2, round(1/x, 2), round(1/x, 0))})) +
theme_bw() +
theme(panel.border = element_rect(colour = "black", fill = NA, linewidth = 1),
panel.grid = element_line(linewidth = 0.2),
axis.title = element_text(size = 12), axis.text = element_text(size = 10),
axis.title.x.top = element_text(size = 12),
legend.text = element_text(size = 10), legend.title = element_text(size = 10))
Here is what I get from the above code:
Here is my objective:
Following Allan's suggestion, the Log Pearson III can be correctly plotted! To simplify my original code, I have tried to use scale_x_log10
instead of scale_x_continuous
. However, the fitted line of Log Pearson III is not correctly plotted. I wonder how to fix this?
ggplot(data = plotdata, aes(x = prob, y = Flow)) +
geom_point(size = 2, color="blue") +
geom_function(fun = ~ 10^(qpearsonIII(.x, params = fit_lp3$estimate)),
color = "black", linewidth = 1.5, alpha = 0.7) +
scale_x_log10(name = "Probability",
trans = "reverse",
breaks = prob_scale_points,
limits = rev(range(prob_scale_points)),
sec.axis = dup_axis(name = "Return Period",
labels = function(x) {ifelse(1/x < 2, round(1/x, 2), round(1/x, 0))})) +
scale_y_log10(breaks = seq(0.05, 0.4, 0.05)) +
theme_bw() +
theme(panel.border = element_rect(colour = "black", fill = NA, linewidth = 1),
panel.grid = element_line(linewidth = 0.2),
axis.title = element_text(size = 12), axis.text = element_text(size = 10),
axis.title.x.top = element_text(size = 12),
legend.text = element_text(size = 10), legend.title = element_text(size = 10))
Here is the figure. We can see that the fitted line is not correct.
You can use geom_function
to draw the output of qPearsonIII
with your fit parameters, though you should plot the logged data to match its output. This would mean relabelling the y axis to make it appear as a log scale.
To get the x axis the way you want, include limits
as well as breaks
.
plotdata = low_flows
plotdata$logflow <- Log_mydata
plotdata <- plotdata[order(plotdata$Flow), ]
plotdata$prob <- seq_len(length(plotdata$Flow))/(length(plotdata$Flow) + 1)
ggplot(data = plotdata, aes(x = prob, y = logflow)) +
geom_point(size = 2, aes(color = "7-Day")) +
geom_function(fun = ~ qpearsonIII(.x, params = fit_lp3$estimate),
aes(color = "7-Day"), linewidth = 1, alpha = 0.8) +
scale_x_continuous("Probability",
transform = scales::probability_trans("norm",
lower.tail = FALSE),
breaks = prob_scale_points,
limits = rev(range(prob_scale_points)),
sec.axis = dup_axis(name = "Return Period",
labels = function(x) {
ifelse(1/x < 2, round(1/x, 2),
round(1/x, 0))})) +
scale_y_continuous("Discharge (cms)",
breaks = log10(seq(0.05, 0.4, 0.05)),
labels = ~10^.x,
limits = log10(c(0.01, 0.4))) +
scale_color_manual("Events and\nComputed Curve", values = "#440154") +
theme_bw(20) +
theme(panel.border = element_rect(colour = "black", fill = NA, linewidth = 1),
panel.grid = element_line(linewidth = 0.4),
axis.title = element_text(size = 12),
axis.text = element_text(size = 10),
axis.title.x.top = element_text(size = 12),
legend.text = element_text(size = 10),
legend.title = element_text(size = 10),
legend.position = "inside",
legend.position.inside = c(0.9, 0.9))