cprecisionnumerical-methodsrunge-kuttalong-double

Long double precision error saturation in RK integrator


I'm trying to write an integrator which uses long doubles for very high precision. I know that my system architecture has long double support, but for some reason, the precision of my integrator maxes out at 16 significant digits. Here's some code which recreates what I'm seeing. The integrator for this example was adapted from this source. In this test case, I am using it to calculate Euler's number (I apologize for the length of the code block but I can't recreate the behavior any other way):

// ld_int.c: a simple integrator using long doubles
#include <stdlib.h>
#include <stdio.h>
#include <math.h>

#define max(x,y) ( (x) < (y) ? (y) : (x) )
#define min(x,y) ( (x) < (y) ? (x) : (y) )

#define ATTEMPTS 50


static long double Runge_Kutta(long double (*f)(long double),
                               long double *y,
                               long double x,
                               long double h);

int Embedded_Verner_3_4(long double (*f)(long double),
                        long double y[],
                        long double x,
                        long double h,
                        long double xmax,
                        long double *h_next,
                        long double tolerance);

long double dydx(long double y);


// the only command-line argument to this is the desired precision.
// that is, if you run this with the argument -14, it will give you 
// an answer which is correct to 10^-14
////////////////////////////////////////////////////
int main(int argc, char *argv[]){
    long double y0[2] = {1.L, 2.L};
    long double x0 = 0.L;
    long double h = 1e-4L;
    long double xmax = 1L;
    long double tol = powl(10, atoi(argv[1]));
    
    Embedded_Verner_3_4(dydx, y0, x0, h, xmax, &h_next, tol)
    
    // compare a long-double value for E
    printf("expl(1) = %0.22LF\n", expl(1));
    
    // with the calculated value
    printf("eval    = %0.22LF\n", y0[1]);

    return(0);
}

// this test function just returns the input so it integrates e^x
long double dydx(long double y){
    return(y);
}

////////////////////////////////////////////////////////////
//  Arguments:                                                                //
//     double *f  Pointer to the function which returns the slope at (x,y) of //
//                integral curve of the differential equation y' = f(x,y)     //
//                which passes through the point (x0,y0) corresponding to the //
//                initial condition y(x0) = y0.                               //
//     double y[] On input y[0] is the initial value of y at x, on output     //
//                y[1] is the solution at xmax.                               //
//     double x   The initial value of x.                                     //
//     double h   Initial step size.                                          //
//     double xmax The endpoint of x.                                         //
//     double *h_next   A pointer to the estimated step size for successive   //
//                      calls to Runge_Kutta_Fehlberg.                        //
//     double tolerance The tolerance of y(xmax), i.e. a solution is sought   //
//                so that the relative error < tolerance.                     //
//                                                                            //
//  Return Values:                                                            //
//     0   The solution of y' = f(x,y) from x to xmax is stored y[1] and      //
//         h_next has the value to the next size to try.                      //
//    -1   The solution of y' = f(x,y) from x to xmax failed.                 //
//    -2   Failed because either xmax < x or the step size h <= 0.            //
////////////////////////////////////////////////////////////////////
int Embedded_Verner_3_4(long double (*f)(long double),
                        long double y[],
                        long double x,
                        long double h,
                        long double xmax,
                        long double *h_next,
                        long double tolerance){
   long double scale;
   long double temp_y[2];
   long double err;
   long double yy;
   int i;
   int last_interval = 0;
   long double MIN_SCALE_FACTOR = 0.125L;
   long double MAX_SCALE_FACTOR = 4.0L;

      // Verify that the step size is positive and that the upper endpoint //
      // of integration is greater than the initial enpoint.               //

   if (xmax < x || h <= 0.0) return -2;
       // If the upper endpoint of the independent variable agrees with the //
       // initial value of the independent variable.  Set the value of the  //
       // dependent variable and return success.                            //

   *h_next = h;
   y[1] = y[0];
   if (xmax == x) return 0; 
       // Insure that the step size h is not larger than the length of the //
       // integration interval.                                            //
  
   h = min(h, xmax - x);

        // Redefine the error tolerance to an error tolerance per unit    //
        // length of the integration interval.                            //

   tolerance /= (xmax - x);

        // Integrate the diff eq y'=f(x,y) from x=x to x=xmax trying to  //
        // maintain an error less than tolerance * (xmax-x) using an     //
        // initial step size of h and initial value: y = y[0]            //

   temp_y[0] = y[0];
   while (x < xmax){
      scale = 1.0L;
      for(i = 0; i < ATTEMPTS; i++){
         err = fabsl(Runge_Kutta(f, temp_y, x, h));
         if(err == 0.0L){
             scale = MAX_SCALE_FACTOR;
             break;
         }
         yy = (temp_y[0] == 0.0L)?
                tolerance:
                fabsl(temp_y[0]);

         scale = 0.8L * powl(tolerance * yy / err, 0.2L);
         scale = min(max(scale,MIN_SCALE_FACTOR), MAX_SCALE_FACTOR);    
         
         if(err < (tolerance * yy))
            break;
         
         h *= scale;
         
         if(x + h > xmax){
             h = xmax - x;
         }

         else if(x + h + 0.5L * h > xmax){
             h = 0.5L * h;
         }
        
      }
      
      if(i >= ATTEMPTS){
          printf("\nout of attempts. stats:\nerr = %LF, h = %LF, scale = %LF, x = %LF\n\n", log10l(err), log10l(h), scale, x);
          *h_next = h * scale;
          return -1;
      }
      
      temp_y[0] = temp_y[1];         
      x += h;
      h *= scale;
      *h_next = h;
      
      if(last_interval)
          break;
      
      if(x + h > xmax){
          last_interval = 1;
          h = xmax - x;
      }
      
      else if(x + h + 0.5 * h > xmax)
          h = 0.5 * h;
   }
   y[1] = temp_y[1];
   return 0;
}

static const long double a2 = 2.0 / 7.0;
static const long double a3 = 7.0 / 15.0;
static const long double a4 = 35.0 / 38.0;

static const long double b31 = 77.0 / 900.0;
static const long double b32 = 343.0 / 900.0;
static const long double b41 = 805.0 / 1444.0;
static const long double b42 = -77175.0 / 54872.0;
static const long double b43 = 97125.0 / 54872.0;
static const long double b51 = 79.0 / 490.0;
static const long double b53 = 2175.0 / 3626.0;
static const long double b54 = 2166.0 / 9065.0;

static const long double c1 = 229.0 / 1470.0;
static const long double c3 = 1125.0 / 1813.0;
static const long double c4 = 13718.0 / 81585.0;
static const long double c5 = 1.0 / 18.0;

static const long double d1 = -888.0 / 163170.0;
static const long double d3 = 3375.0 / 163170.0;
static const long double d4 = -11552.0 / 163170.0;
static const long double d5 = 9065.0 / 163170.0;

//////////////////////////////////////////////////////////
//  Arguments:                                                                //
//     double *f  Pointer to the function which returns the slope at (y) of //
//                integral curve of the differential equation y' = f(y)     //
//                which passes through the point (x0,y[0]).                   //
//     double y[] On input y[0] is the initial value of y at x, on output     //
//                y[1] is the solution at x + h.                              //
//     double x   Initial value of x.                                         //
//     double h   Step size                                                   //
//                                                                            //
//  Return Values:                                                            //
//     This routine returns the err / h.  The solution of y(x) at x + h is    //
//     returned in y[1].  
///////////////////////////////////////
static long double Runge_Kutta(long double (*f)(long double),
                               long double *y,
                               long double x0,
                               long double h){
       
    long double k1, k2, k3, k4, k5;
    long double h2 = a2 * h;

    k1 = (*f)(*y);
    k2 = (*f)(*y + h2 * k1);
    k3 = (*f)(*y + h * ( b31*k1 + b32*k2) );
    k4 = (*f)(*y + h * ( b41*k1 + b42*k2 + b43*k3) );
    k5 = (*f)(*y + h * ( b51*k1 + b53*k3 + b54*k4) );
    *(y+1) = *y +  h * (c1*k1 + c3*k3 + c4*k4 + c5*k5);
    return  d1*k1 + d3*k3 + d4*k4 + d5*k5;
}

I compile this code with

$ gcc -c ld_int.c
$ gcc ld_int.o -lm -o ldint

and running it with

$ ./ldint -16

will return

expl(1) = 2.7182818284590452354282
eval    = 2.7182818284590453085034

where the two values of E agree to the 16th decimal place. However, when I try to go to higher order, with ./ldt -17, for example, it suddenly can't converge. If you have the integrator print the error for every step, you see it hovers around 10^-16.8, but the timestep will go to 0 before the error ever gets below 10^-17. This sounds to me like it's still getting saturated at double precision, despite my doing my best to make everything in the code a long double.

Again, I apologize for the amount of code in the above block, but it really doesn't reproduce any other way that I can figure out. I wrote a test function to calculate E through its Taylor expansion instead, and it was completely happy to calculate it to 19 significant figures with long doubles. I can't figure out what's special about the integration case.


Solution

  • but for some reason, the precision of my integrator maxes out at 16 significant digits.

    At a minimum, use more correct values of long double initialization with long double quotients rather than double quotients.

    // long double a2 = 2.0 / 7.0;
    long double a2 = 2.0L / 7.0L;
    

    Like-wise for the other 19 initializations.