Determine if provided coordinates represent a polygon or an ellipse

I have a string of coordinates, read from a JSON file, which belongs to a certain ROI, drawn by the user of an application. I am supposed to eventually determine if the coordinates represent a polygon or an ellipse (AKA did the user paint a polygon or an ellipse). I have a serious problem of figuring out the correct algorithm, since it is also a math-related problem. The code have written so far, recognizes an ellipse, but then considers a polygon also an ellipse. My initial idea was to put all the coords inside the ellipse equation and check if it matches the characteristics of an ellipse. But somehow I fail to differentiate it from the polygon.

Example for the format of the coordinates that I retrieve from the JSON file:

Ellipse coordinates:

M/322.504/80.6014;L/322.3/84.7773;L/321.684/88.899;L/319.253/96.9595;L/315.292/104.742;L/309.881/112.205;L/303.102/119.309;L/295.036/126.012;L/285.763/132.273;L/275.364/138.052;L/263.921/143.307;L/251.515/147.997;L/238.226/152.082;L/224.136/155.521;L/209.325/158.273;L/193.875/160.297;L/177.866/161.551;L/161.38/161.996;L/144.892/161.603;L/128.88/160.399;L/113.423/158.423;L/98.6038/155.718;L/84.5029/152.323;L/71.2013/148.28;L/58.7804/143.628;L/47.3212/138.409;L/36.9047/132.663;L/27.6122/126.431;L/19.5247/119.753;L/12.7234/112.671;L/7.28933/105.224;L/3.30368/97.4543;L/0.847538/89.4014;L/0.218384/85.2816;L/0.00202717/81.1064;L/0.205307/76.9305;L/0.821556/72.8088;L/3.25246/64.7483;L/7.21376/56.9658;L/12.6245/49.5023;L/19.4036/42.3987;L/27.4701/35.6959;L/36.7431/29.4347;L/47.1415/23.6562;L/58.5843/18.4012;L/70.9906/13.7107;L/84.2794/9.62544;L/98.3697/6.1865;L/113.18/3.43473;L/128.631/1.41106;L/144.639/0.156398;L/161.126/-0.288348;L/177.613/0.104763;L/193.626/1.30929;L/209.083/3.28456;L/223.902/5.98993;L/238.003/9.38472;L/251.304/13.4283;L/263.725/18.08;L/275.185/23.2991;L/285.601/29.045;L/294.894/35.2771;L/302.981/41.9547;L/309.782/49.037;L/315.216/56.4835;L/319.202/64.2535;L/321.658/72.3064;L/322.287/76.4262;L/322.504/80.6014

Polygon coordinates:

M/0.0102565/69.1651;L/19.019/51.4713;L/19.6427/5.25438;L/111.389/0.385824;L/112.778/24.1719;L/288.807/24.6385;L/288.255/129.985;L/242.72/131.399;L/221.009/162.01;L/138.096/166.188;L/116.982/128.833;L/113.55/100.971;L/65.9781/103.747;L/48.9573/79.3007;L/1.3638/64.406

So, the program is supposed to determine that the first set of coordinates belong to an ellipse, and the second one to a polygon.

Note: the number of coordinates are never the same, since it is fully up to the user, how the polygon should look like, therefore it is not safe to say that the length of the coordinates from a polygon will also be fewer than an ellipse.

I thank you guys in advance and hope to be able to find a solution for this problem.

My current code:


#include <iostream>
#include <vector>
#include <sstream>
#include <cmath>

// Structure to hold coordinate data
struct Coordinate {
    double x;
    double y;
};

// Function to check if coordinates satisfy the ellipse equation
bool isEllipse(const std::vector<Coordinate>& coordinates) {
    // Number of coordinates
    int numPoints = coordinates.size();

    // Calculate the centroid
    double sumX = 0.0, sumY = 0.0;
    for (const auto& coord : coordinates) {
        sumX += coord.x;
        sumY += coord.y;
    }
    double centerX = sumX / numPoints;
    double centerY = sumY / numPoints;

    // Calculate the major and minor axes
    double maxDistSqr = 0.0;
    for (const auto& coord : coordinates) {
        double dx = coord.x - centerX;
        double dy = coord.y - centerY;
        double distSqr = dx * dx + dy * dy;
        if (distSqr > maxDistSqr) {
            maxDistSqr = distSqr;
        }
    }
    double a = std::sqrt(maxDistSqr);
    double b = std::sqrt(maxDistSqr);

    // Check if the coordinates satisfy the ellipse equation
    for (const auto& coord : coordinates) {
        double dx = coord.x - centerX;
        double dy = coord.y - centerY;
        double ellipseResult = (dx * dx) / (a * a) + (dy * dy) / (b * b);
        if (ellipseResult > 1.0) {
            return false;
        }
    }

    return true;
}

int main() {
    std::string coordinatesStr = "M/-0.247283/512.418;L/166.935/505.209;L/175.478/415.698;L/141.124/384.968;L/207.244/354.6;L/211.621/292.758;L/269.472/283.712;L/259.446/10.8997;L/773.612/-0.156929;L/773.644/277.548;L/850.632/280.289;L/850.638/335.28;L/927.629/368.266;L/886.392/423.262;L/894.646/470.004;L/1007.98/479.435;L/1015.16/565.507;L/20.5376/572.793;L/-0.0663959/512.529;L/0.189167/513.04";

    std::vector<Coordinate> coordinates;

    // Parse the coordinate string
    std::istringstream iss(coordinatesStr);
    std::string segment;
    while (std::getline(iss, segment, ';')) {
        // Parse M or L coordinates
        if (segment[0] == 'M' || segment[0] == 'L') {
            std::istringstream coordIss(segment.substr(2));
            std::string xStr, yStr;
            if (std::getline(coordIss, xStr, '/') && std::getline(coordIss, yStr, '/')) {
                try {
                    double x = std::stod(xStr);
                    double y = std::stod(yStr);
                    coordinates.push_back({ x, y });
                } catch (const std::invalid_argument& e) {
                    std::cerr << "Failed to parse coordinate: " << segment << std::endl;
                }
            }
        }
    }

    // Check if the coordinates satisfy the ellipse equation
    bool isAnEllipse = isEllipse(coordinates);

    if (isAnEllipse) {
        std::cout << "The coordinates form an ellipse." << std::endl;
    } else {
        std::cout << "The coordinates do not form an ellipse." << std::endl;
    }

    return 0;
}

Solution

Well, I guess it comes down to fitting the best ellipse (e.g. by least squares) and then deciding whether the (normalised) error is less than some pre-set tolerance.

The general ellipse (which is NOT necessarily aligned with the coordinate axes) is ax²+bxy+cy²+dx+ey+f=0, with b²-4ac<0.

Since the coefficient of x², a, can't be zero for an ellipse you can divide through by this, or, equivalently, take a=1. You then need to fit the free parameters b, c, d, e, f which you can do by minimising the squared error

sum(over all points) (ax²+bxy+cy²+dx+ey+f)²

Differentiating w.r.t. each parameter in turn you get five linear equations for the five parameters. (If you want an ellipse with axes parallel to the coordinate axes then take b=0 and reduce the number of parameters to 4.)

This gives the best-fit ellipse, but to decide whether your set of points fits it adequately you need to look at the sum-of-squared-errors (or, rather, that normalised, since it would otherwise depend on the units for x and y).

With a tolerance of 0.1%, your first example fits an ellipse and your second doesn't.

#include <iostream>
#include <vector>
#include <utility>
#include <cmath>
using namespace std;

struct Pt{ double x, y; };

bool isEllipse( const vector<Pt> &S );
bool solve( vector<vector<double>> A, vector<double> B, vector<double> &X );

//======================================================================

int main()
{
   bool result;

   vector<Pt> case1 = {
{322.504,80.6014}, {322.3,84.7773}, {321.684,88.899}, {319.253,96.9595}, {315.292,104.742},
{309.881,112.205}, {303.102,119.309}, {295.036,126.012}, {285.763,132.273}, {275.364,138.052}, 
{263.921,143.307}, {251.515,147.997}, {238.226,152.082}, {224.136,155.521}, 
{209.325,158.273}, {193.875,160.297}, {177.866,161.551}, {161.38,161.996}, 
{144.892,161.603}, {128.88,160.399}, {113.423,158.423}, {98.6038,155.718}, 
{84.5029,152.323}, {71.2013,148.28}, {58.7804,143.628}, {47.3212,138.409}, 
{36.9047,132.663}, {27.6122,126.431}, {19.5247,119.753}, {12.7234,112.671}, 
{7.28933,105.224}, {3.30368,97.4543}, {0.847538,89.4014}, {0.218384,85.2816}, 
{0.00202717,81.1064}, {0.205307,76.9305}, {0.821556,72.8088}, {3.25246,64.7483}, 
{7.21376,56.9658}, {12.6245,49.5023}, {19.4036,42.3987}, {27.4701,35.6959}, 
{36.7431,29.4347}, {47.1415,23.6562}, {58.5843,18.4012}, {70.9906,13.7107}, 
{84.2794,9.62544}, {98.3697,6.1865}, {113.18,3.43473}, {128.631,1.41106}, 
{144.639,0.156398}, {161.126,-0.288348}, {177.613,0.104763}, {193.626,1.30929}, 
{209.083,3.28456}, {223.902,5.98993}, {238.003,9.38472}, {251.304,13.4283}, 
{263.725,18.08}, {275.185,23.2991}, {285.601,29.045}, {294.894,35.2771}, 
{302.981,41.9547}, {309.782,49.037}, {315.216,56.4835}, {319.202,64.2535}, 
{321.658,72.3064}, {322.287,76.4262}, {322.504,80.6014} };
   result = isEllipse( case1 );
   cout << boolalpha << result << "\n\n\n";

   vector<Pt> case2 = {
{0.0102565,69.1651}, {19.019,51.4713}, {19.6427,5.25438}, {111.389,0.385824}, {112.778,24.1719}, {288.807,24.6385}, 
{288.255,129.985}, {242.72,131.399}, {221.009,162.01}, {138.096,166.188}, {116.982,128.833}, {113.55,100.971}, {65.9781,103.747}, 
{48.9573,79.3007}, {1.3638,64.406 } };
   result = isEllipse( case2 );
   cout << boolalpha << result << "\n\n\n";
}

//=====================================================================

// Fit best ellipse of form    x^2 + bxy + cy^2 + dx + ey + f = 0
bool isEllipse( const vector<Pt> &S )
{
   const double EPS = 1.0e-3;                    // tolerance on normalised squared error
   int N = 5;                                    // number of parameters (b, c, d, e, f )
   if ( S.size() < N ) return false;             // too small; any ellipse wouldn't be unique

   vector<vector<double>> M( N, vector<double>( N ) );     // N equations for N parameters
   vector<double> rhs( N ), coeff( N );
   for ( Pt p : S )
   {
      double x = p.x, y = p.y;
      vector<double> w = { x * y, y * y, x, y, 1 };        // weights on the parameters
      for ( int i = 0; i < N; i++ )
      {
         for ( int j = 0; j < N; j++ ) M[i][j] += w[i] * w[j];
         rhs[i] -= w[i] * x * x;
      }
   }
   if ( !solve( M, rhs, coeff ) ) return false;            // solve for parameters

   double a = 1.0, b = coeff[0], c = coeff[1], d = coeff[2], e = coeff[3], f = coeff[4];
   cout << "a, b, c, d, e, f = " << a << "  " << b << "  " << c << "  " << d << "  " << e << "  " << f << '\n';
   if ( b * b - 4.0 * a * c >= 0.0 ) return false;         // Wrong conic section

   double sumEsq = 0.0, sumRsq = 0.0;
   for ( Pt p : S )
   {
      double x = p.x, y = p.y;
      vector<double> w = { x * y, y * y, x, y, 1 };
      double error = x * x, rsq = x * x + y * y;
      for ( int j = 0; j < N; j++ ) error += w[j] * coeff[j];
      sumEsq += error * error;
      sumRsq += rsq   * rsq  ;
   }
   sumEsq /= sumRsq;
   cout << "Normalised squared error = " << sumEsq << '\n';
   return sumEsq < EPS;
}

//======================================================================

bool solve( vector<vector<double>> A, vector<double> B, vector<double> &X )
// Solve AX = B by Gaussian elimination
// Note: copies deliberately made of A and B through passing by value
{
   const double SMALL = 1.0e-10;
   int n = A.size();

   // Row operations for i = 0, ,,,, n - 2 (n-1 not needed)
   for ( int i = 0; i < n - 1; i++ )
   {
      // Pivot: find row r below with largest element in column i
      int r = i;
      double maxA = abs( A[i][i] );
      for ( int k = i + 1; k < n; k++ )
      {
         double val = abs( A[k][i] );
         if ( val > maxA )
         {
            r = k;
            maxA = val;
         }
      }
      if ( r != i )
      {
         for ( int j = i; j < n; j++ ) swap( A[i][j], A[r][j] );
         swap( B[i], B[r] );
      }

      // Row operations to make upper-triangular
      double pivot = A[i][i];
      if ( abs( pivot ) < SMALL ) return false;            // Singular matrix

      for ( int r = i + 1; r < n; r++ )                    // On lower rows
      {
         double multiple = A[r][i] / pivot;                // Multiple of row i to clear element in ith column
         for ( int j = i; j < n; j++ ) A[r][j] -= multiple * A[i][j];
         B[r] -= multiple * B[i];
      }
   }

   // Check last row
   if ( abs( A[n-1][n-1] ) < SMALL ) return false;         // Singular matrix

   // Back-substitute
   X = B;
   for ( int i = n - 1; i >= 0; i-- )
   {
      for ( int j = i + 1; j < n; j++ )  X[i] -= A[i][j] * X[j];
      X[i] /= A[i][i];
   }

   return true;
}

Output:

a, b, c, d, e, f = 1  0.00923995  3.94893  -323.252  -640.062  25930.5
Normalised squared error = 6.12962e-09
true


a, b, c, d, e, f = 1  -0.67009  1.80408  -264.498  -137.44  9896.96
Normalised squared error = 0.0197783
false