How to perform accurate FixedPoint Number Calculation in C?

How to perform accurate FixedPoint Number Calculation in C ??

I use the struct in C to store the integer part and the fraction part.

I guess the function is incorrect, maybe didn't handle the overflow properly, but I don't know how to modify it.

The input is like:

3.14123412345 + 0.23
5.6324 - 6.43252
8.112 * 1.31

My outputs is:

3.000000
-1.000000
0.000000

The outputs of my program are completely wrong, the precision of the numbers is lost when stored or calculated.

Here is my code:

#include<stdio.h>
#include<string.h>
#include<math.h>
#define maxn 110000
#define uint unsigned int
#define ull unsigned long long
typedef struct {
    int intgr;
    uint frac;
}PointFixedNum;
PointFixedNum a,b,ans;
char s1[maxn],s2[maxn],op;
double num1=0.0,num2=0.0;
void init(PointFixedNum *a,double num)
{
    a->intgr=(int)num;
    a->frac=(uint)((ull)(num-a->intgr)<<32);
}
void clear()
{
    a.frac=b.frac=ans.frac=0;
    a.frac=b.frac=ans.frac=0;
    num1=0.0;
    num2=0.0;
}
void read()
{
    scanf("%1100[^ ]",s1);
    scanf(" %c",&op);
    scanf("%1100s",s2);
    sscanf(s1,"%lf",&num1);
    sscanf(s2,"%lf",&num2);
}
PointFixedNum add(PointFixedNum a,PointFixedNum b)
{
    PointFixedNum c;
    ull sum=(ull)a.frac+b.frac;
    c.frac=(uint)(sum&((1ULL<<32)-1));
    c.intgr=a.intgr+b.intgr+(int)(sum>>32);
    return c;
}
PointFixedNum sub(PointFixedNum a,PointFixedNum b)
{
    PointFixedNum c;
    ull diff=(ull)a.frac-b.frac;
    c.frac=(uint)(diff&((1ULL<<32)-1));
    c.intgr=a.intgr-b.intgr-(int)(diff>>32);
}
PointFixedNum mul(PointFixedNum a,PointFixedNum b)
{
    PointFixedNum c;
    ull pro1=(ull)a.intgr*b.intgr;
    ull pro2=(ull)a.frac*b.frac;
    ull temp=(ull)((ull)pro1<<32)+(ull)pro2;
    c.intgr=(int)(temp>>32);
    c.frac=(uint)(temp&((1ULL<<32)-1));
}
void print(PointFixedNum a)
{
    double fraction=(double)a.frac/(1ULL<<32);
    double fin=(double)a.intgr+fraction;
    printf("%lf",fin);
}
int main()
{
    read();
    init(&a,num1);
    init(&b,num2);
    if(op=='+')
    {
        ans=add(a,b);
    }
    else if(op=='-')
    {
        ans=sub(a,b);
    }
    else if(op=='*')
    {
        ans=mul(a,b);
    }
    print(ans);
    return 0;
}

Updated:

I fixed some of the bugs in my program based on comments and answers, here is the result:

Inputs

3.14123412345 + 0.23
5.6324 - 6.43252
8.112 * 1.31

Outputs

3.37123412336
-0.80012000003
10.62671999843

So far, the functions of each function have been implemented, at least there should be no logical problem.

The only problem now is the precision, the add function will lost the precision after ten decimal places, as well as the sub function.

How to improve precision

Is there any way to improve the precision? Or the loss of precision is inevitable after ten decimal places?

If it is inevitable, since the process of the add and sub only relates to the shortest number, is there a way to keep the changeless part in the output?

for example: 3.14123412345 + 0.23 = 3.37123412345 , the first and second digit after decimal point (3.14) are added to 0.23 , but the 123412345 remained. Is there a way to keep it in the output directly?

About multiply

When the output result has a finite number of bits, the result is quite correct, so is there a way to limit the number of bits in the outputs to the same as the actual result?

for example: 8.112 * 1.31 = 10.62672 , if there is way to limit the output bits to 5, the output answer is correct.

This is my adjusted code:

#include<stdio.h>
#include<string.h>
#include<math.h>
#include<stdint.h>
#define maxn 110000
#define int int32_t
#define uint uint32_t
#define ull uint64_t
typedef struct {
    int intgr;
    uint frac;
}PointFixedNum;
PointFixedNum a,b,ans;
const ull scale=(1ULL<<32);
char s1[maxn],s2[maxn],op;
double num1=0.0,num2=0.0;
void init(PointFixedNum *a,double num)
{
    a->intgr=(int)num;
    a->frac=(uint)((num-a->intgr)*scale);
}
/*void init(PointFixedNum *a, double num) {
  static const long long scale = 1LL << 32;

  // Round to get y
  long long y = llround(num*scale);

  // Now break y into 2 parts:
  a->intgr = y/scale;
  int32_t frac = y%scale;
  if (frac >= 0) {
    a->frac = frac;
  } else {
    a->frac = scale + frac;
    a->intgr--;
  }
}*/
// I check this version and the result is the same.
void clear()
{
    a.frac=b.frac=ans.frac=0;
    a.frac=b.frac=ans.frac=0;
    num1=0.0;
    num2=0.0;
}
void read()
{
    scanf(" %1100[^ ]",s1);
    scanf(" %c",&op);
    scanf("%1100s",s2);
    sscanf(s1,"%lf",&num1);
    sscanf(s2,"%lf",&num2);
}
PointFixedNum add(PointFixedNum a,PointFixedNum b)
{
    PointFixedNum c;
    ull sum=(ull)a.frac+b.frac;
    c.frac=(uint)(sum%scale);
    c.intgr=a.intgr+b.intgr+(int)(sum/scale);
    return c;
}
PointFixedNum sub(PointFixedNum a,PointFixedNum b)
{
    PointFixedNum c;
    ull diff=(ull)a.frac-b.frac;
    c.frac=(uint)(diff%scale);
    c.intgr=a.intgr-b.intgr-(int)(diff/scale);
    return c;
}
PointFixedNum mul(PointFixedNum a,PointFixedNum b)
{
    PointFixedNum c={0,0};
    //ull scale=1ULL<<32;
    ull pro1=(ull)a.intgr*b.intgr;
    ull pro2=(ull)a.intgr*b.frac;
    ull pro3=(ull)a.frac*b.intgr;
    ull pro4=(ull)a.frac*b.frac;

    ull carry=pro4%scale>=scale/2;

    ull sum=pro4/scale+pro3%scale+pro2%scale+carry;
    c.frac=sum%scale;
    carry=sum/scale;

    sum=pro1%scale+pro2/scale+pro3/scale+carry;
    c.intgr=sum%scale;

    carry=sum/scale;
    sum=pro1/scale+carry;

    return c;
}
void print(PointFixedNum a)
{
    double fraction=(double)a.frac/scale;
    double fin=(double)a.intgr+fraction;
    printf("%.11lf",fin);
}
int main()
{
    read();
    init(&a,num1);
    init(&b,num2);
    if(op=='+')
    {
        ans=add(a,b);
    }
    else if(op=='-')
    {
        ans=sub(a,b);
    }
    else if(op=='*')
    {
        ans=mul(a,b);
    }
    print(ans);
    return 0;
}

Solution

Code has at least these problems:

Incomplete multiplication

mul(PointFixedNum a,PointFixedNum b) is missing the a.intgr*b.frac and *b.intgr*a.frac part.

Insufficient printing

printf("%lf",fin); prints a 6 digits fraction part, yet the fixed fraction is 1 part in 2³². Code should print at least 10 digits to the right of the decimal point: printf("%.10lf",fin);

Not checking scanf() results

Always check the return value of scanf() to see if reading succeeded.

scanf("%1100[^ ]",s1); should be scanf(" %1100[^ ]",s1); (add space) if read() is ever call again to consume any left over '\n' from prior input.

Too narrow an unsigned?

Instead of uint frac;, use uint32_t frac from <stdint.h> and avoid a dependency on unsigned size (unsigned may be 16-bit especially on embedded platforms).

Addition error

@Adrian McCarthy and @Gerhardh caught add/subtract issue.

init() deeper dive

Aside from the significant error in (ull)(num-a->intgr)<<32, better to round to the nearest value (using a round function) than a cast to an integer, which truncates.

Sample untested code to illustrate the idea.

#include <math.h>

void init(PointFixedNum *a, double num) {
  static const long long scale = 1LL << 32;

  // Round to get y
  long long y = llround(num*scale);

  // Now break y into 2 parts:
  a->intgr = y/scale;
  int32_t frac = y%scale;
  if (frac >= 0) {
    a->frac = frac;
  } else {
    a->frac = scale + frac;
    a->intgr--;
  }
}

Multiply

To multiply 2 positive values follows. OP will need to adjust things for mixed signed values.

Semi-pseudo-code

PointFixedNum mult(PointFixedNum a, PointFixedNum b) {
  ull scale = 1LL < 32;
  PointFixedNum c = 0;
  // Form the 4 64-bit products.
  ull pafbf = (ull)a.frac  * b.frac;  
  ull pafbi = (ull)a.frac  * b.ipart;
  ull paibf = (ull)a.ipart * b.frac;
  ull paibi = (ull)a.ipart * b.ipart;

  // These 4 products have a 32-bit most significant half
  // and a least 32-bit significant half.
  // Add these 8 32-bit values in columns of same significance.
  // Remember to carry. 
  //
  // paibi.upper paibi.lower
  //             paibf.upper paibf.lower
  //             pafbi.upper pafbi.lower
  //                         pafbf.upper pafbf.lower 
  // ===============================================
  // overflow        c.ipart     c.frac  

  // Round to form the first carry.
  ull carry = pafbf%scale >= scale/2;

  // Form the fraction.
  ull sum = pafbf/scale + pafbi%scale + paibf%scale + carry;
  c.frac = sum%scale;
  carry = sum / scale;

  // Form the integer part.
  sum = pafbi/scale + paibf/scale + paibi%scale + carry;
  c.ipart = sum%scale;
  carry = sum / scale;

  // Maybe form the overflow part.
  sum = paibi/scale + carry;
  // `sum` is the overflow part.
  // Perhaps do something special if it is non-zero as overflow occurred.

Advanced:
To round half-ways cases to nearest even, as done with common FP is trickier. Something like:

  // Form the fraction.
  ull sum = pafbf/scale + pafbi%scale + paibf%scale;
  if (sum & 1) {
    sum += pafbf%scale >= scale/2;
  } else {
    sum += pafbf%scale > scale/2;
  } 
  c.frac = sum%scale;
  carry = sum / scale;