I need to plot prediction and confidence intervals and need to use python and only the following packages. How do I plot both intervals in the same model. I managed to plot the prediction intervals with the help of ChatGPT. this is the code including the data set.
import statsmodels.api as sm
import statsmodels.formula.api as smf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
#dataframe
data = {
'X': [55641, 55681, 55637, 55825, 55772, 55890, 56068, 56299, 56825, 57205,
57562, 57850, 57975, 57992, 58240, 58414, 58561, 59066, 58596, 58631,
58758, 59037],
'Y': [21886, 21934, 21699, 21901, 21812, 21714, 21932, 22086, 22265, 22551,
22736, 22301, 22518, 22580, 22618, 22890, 23112, 23315, 22865, 22788,
22949, 23149]
}
df = pd.DataFrame(data)
#OLS
model = smf.ols(formula='Y ~ X', data=df)
results = model.fit()
print(results.summary())
#calculating prediction intevals
predictions = results.get_prediction(df)
prediction_summary_frame = predictions.summary_frame(alpha=0.05)
#data points
plt.scatter(df['X'], df['Y'], color='black', label='Data')
#regression line
plt.plot(df['X'], results.fittedvalues, color='#58C9F4', label='Regression Line')
#prediction invterval
plt.fill_between(df['X'], prediction_summary_frame['obs_ci_lower'], prediction_summary_frame['obs_ci_upper'], color='grey', alpha=0.2, label='95% Prediction Interval')
plt.xlabel('X')
plt.ylabel('Y')
plt.title('Linear Regression with Prediction Intervals')
plt.legend()
plt.show()
How should I continue to be able to plot both intervals?
summary_frame
method provides both the prediction intervals (obs_ci_lower
, obs_ci_upper
) and the confidence intervals (mean_ci_lower
, mean_ci_upper
).fill_between
function is used twice to plot both intervals: once for the prediction interval and once for the confidence interval.import statsmodels.api as sm
import statsmodels.formula.api as smf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# Dataframe
data = {
'X': [55641, 55681, 55637, 55825, 55772, 55890, 56068, 56299, 56825, 57205,
57562, 57850, 57975, 57992, 58240, 58414, 58561, 59066, 58596, 58631,
58758, 59037],
'Y': [21886, 21934, 21699, 21901, 21812, 21714, 21932, 22086, 22265, 22551,
22736, 22301, 22518, 22580, 22618, 22890, 23112, 23315, 22865, 22788,
22949, 23149]
}
df = pd.DataFrame(data)
# OLS
model = smf.ols(formula='Y ~ X', data=df)
results = model.fit()
print(results.summary())
# Calculating prediction intervals
predictions = results.get_prediction(df)
prediction_summary_frame = predictions.summary_frame(alpha=0.05)
# Calculating confidence intervals
confidence_intervals = results.conf_int(alpha=0.05)
# Data points
ax = df.plot(kind='scatter', x='X', y='Y', color='black', label='Data')
# Regression line
ax.plot(df['X'], results.fittedvalues, color='#58C9F4', label='Regression Line')
# Prediction interval
ax.fill_between(df['X'], prediction_summary_frame['obs_ci_lower'], prediction_summary_frame['obs_ci_upper'], color='grey', alpha=0.2, label='95% Prediction Interval')
# Confidence interval
ax.fill_between(df['X'], prediction_summary_frame['mean_ci_lower'], prediction_summary_frame['mean_ci_upper'], color='blue', alpha=0.2, label='95% Confidence Interval')
ax.set(xlabel='X', ylabel='Y', title='Linear Regression with Prediction and Confidence Intervals')
ax.legend()
plt.show()
OLS Regression Results
==============================================================================
Dep. Variable: Y R-squared: 0.919
Model: OLS Adj. R-squared: 0.915
Method: Least Squares F-statistic: 227.5
Date: Tue, 11 Jun 2024 Prob (F-statistic): 2.17e-12
Time: 06:30:58 Log-Likelihood: -140.06
No. Observations: 22 AIC: 284.1
Df Residuals: 20 BIC: 286.3
Df Model: 1
Covariance Type: nonrobust
==============================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------
Intercept 559.4600 1450.698 0.386 0.704 -2466.642 3585.562
X 0.3815 0.025 15.084 0.000 0.329 0.434
==============================================================================
Omnibus: 0.314 Durbin-Watson: 1.479
Prob(Omnibus): 0.855 Jarque-Bera (JB): 0.390
Skew: -0.242 Prob(JB): 0.823
Kurtosis: 2.562 Cond. No. 2.64e+06
==============================================================================
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 2.64e+06. This might indicate that there are
strong multicollinearity or other numerical problems.
In the original code, the intercept at x = 0
is correct, but to visually represent it, the x-axis limits need to be expanded. Additionally, the slope appears distorted due to an unequal aspect ratio between the X and Y axes, causing a misrepresentation of the linear relationship.
# OLS
model = smf.ols(formula='Y ~ X', data=df)
results = model.fit()
# Extract coefficients
intercept = results.params['Intercept']
slope = results.params['X']
# Calculate regression line values
x_values = np.linspace(0, 59037, 100)
y_values = intercept + slope * x_values
# Calculating prediction intervals
predictions = results.get_prediction(df)
prediction_summary_frame = predictions.summary_frame(alpha=0.05)
# Calculating confidence intervals
confidence_intervals = results.conf_int(alpha=0.05)
# Data points
ax = df.plot(kind='scatter', x='X', y='Y', color='black', label='Data', s=1, figsize=(12, 10))
# Regression line
ax.plot(x_values, y_values, color='#58C9F4', label='Regression Line')
# Adding the point where the regression line intersects the y-axis
ax.scatter(0, intercept, color='red', label='Y-intercept (x=0)', s=2)
ax.annotate(f'(0, {intercept:.2f})', xy=(0, intercept), xytext=(2000, intercept + 1000), arrowprops=dict(facecolor='black', shrink=0.05))
# Annotating the regression line with the slope
ax.text(x_values[len(x_values)//2], y_values[len(y_values)//2], f'Slope: {slope:.2f}', fontsize=12, verticalalignment='bottom')
# Prediction interval
ax.fill_between(df['X'], prediction_summary_frame['obs_ci_lower'], prediction_summary_frame['obs_ci_upper'], color='grey', alpha=0.2, label='95% Prediction Interval')
# Confidence interval
ax.fill_between(df['X'], prediction_summary_frame['mean_ci_lower'], prediction_summary_frame['mean_ci_upper'], color='blue', alpha=0.2, label='95% Confidence Interval')
# Setting equal aspect ratio
ax.set_aspect('equal', 'box')
ax.set(xlabel='X', ylabel='Y', title='Linear Regression with Prediction and Confidence Intervals')
ax.legend(bbox_to_anchor=(1, 0.5), loc='center left', frameon=False)
plt.show()