I have written visual basic code to determine the r square when fitting data using different order polynomials:
As an example, I have used the following data (Y = 2X^3 - 3X^2 + 4*X - 5):
All three methods fit the data (calculating the coefficients) correctly - it corresponds with the coefficients given by the trendlines on an Excel graph; however, the reported r square differs.
I get the following R-squared values:
Linear fitting:
2nd order polynomial:
3nd order polynomial:
Why is there such a difference? Why is R^2 bigger than one? How do I correctly calculate r square using the WorksheetFunction.LinEst function?
Here is my code:
Sub Data_regression()
Dim X() As Double ' X values
Dim Y() As Double ' Y values
Dim Z() As Variant ' Regression statistics
Dim number_datapoints As Integer
Dim i, r As Integer
' Insert data in columns A and B
' Column A contains X values
' Column B contains Y values
' First value should start in row 1
With ActiveSheet
' Get number of datapoints
r = 1
While .Cells(r + 1, 1).Text <> ""
r = r + 1
Wend
number_datapoints = r
' ********* LINEAR REGRESSION *********
' Define the ranges for X and Y data
ReDim X(number_datapoints - 1, 0)
ReDim Y(number_datapoints - 1, 0)
r = 1
For r = 1 To number_datapoints
i = r - 1
Y(i, 0) = .Cells(r, 2)
X(i, 0) = .Cells(r, 1)
Next r
' Perform regression using LINEST function
Z = WorksheetFunction.LinEst(Y, X, True, True)
' Write the results back to the worksheet
r = 1
.Cells(r, 4).Value = "y = m.x + c"
.Cells(r + 1, 4).Value = "slope (m):"
.Cells(r + 1, 5).Value = Z(1, 1)
.Cells(r + 2, 4).Value = "Intercept (c):"
.Cells(r + 2, 5).Value = Z(1, 2)
.Cells(r + 3, 4).Value = "R-squared:"
.Cells(r + 3, 5).Value = Z(2, 1)
' ********* QUADRATIC REGRESSION *********
ReDim Preserve X(number_datapoints - 1, 1)
For i = 0 To number_datapoints - 1
X(i, 1) = X(i, 0) ^ 2
Next i
' Perform regression using LINEST function
Z = WorksheetFunction.LinEst(Y, X, True, True)
' Write the results back to the worksheet
r = 6
.Cells(r, 4).Value = "y = a.x^2 + b.x + c"
.Cells(r + 1, 4).Value = "a:"
.Cells(r + 1, 5).Value = Z(1, 1)
.Cells(r + 2, 4).Value = "b:"
.Cells(r + 2, 5).Value = Z(1, 2)
.Cells(r + 3, 4).Value = "c:"
.Cells(r + 3, 5).Value = Z(1, 3)
.Cells(r + 4, 4).Value = "R-squared:"
.Cells(r + 4, 5).Value = Z(2, 1)
' ********* THIRD DEGREE POLINOMIAL REGRESSION *********
ReDim Preserve X(number_datapoints - 1, 2)
For i = 0 To number_datapoints - 1
X(i, 2) = X(i, 0) ^ 3
Next i
' Perform regression using LINEST function
Z = WorksheetFunction.LinEst(Y, X, True, True)
' Write the results back to the worksheet
r = 12
.Cells(r, 4).Value = "y = a.x^3 + b.x^2 + c.x + d"
.Cells(r + 1, 4).Value = "a:"
.Cells(r + 1, 5).Value = Z(1, 1)
.Cells(r + 2, 4).Value = "b:"
.Cells(r + 2, 5).Value = Z(1, 2)
.Cells(r + 3, 4).Value = "c:"
.Cells(r + 3, 5).Value = Z(1, 3)
.Cells(r + 4, 4).Value = "d:"
.Cells(r + 4, 5).Value = Z(1, 4)
.Cells(r + 5, 4).Value = "R-squared:"
.Cells(r + 5, 5).Value = Z(2, 1)
End With
End Sub
You read the wrong position from the array Z
: R^2
is in Z(3,1)
and not in Z(2,1)
.