ccsvstrtokfileparsing

Cannot properly count elements in CSV after strtok in C


I only sometimes see a proper count of the number of elements after I use strtok to delimit by commas and quotation marks (double quotes). Usually the printf is length 0, but occasionally 6 and 1 with no change to the code.

I have tried only using one delimiter (commas) and defining the token output of strtok in different ways, and rearranging the order of statements in the while loop where the other elements of the row are supposedly delimited. These are a few lines of the .csv file I am using to test my code (test.csv). It is the same format that NOAA provides precipitation data in as a .csv.

"STATION","NAME","DATE","PRCP","PRCP_ATTRIBUTES"
"US183459384","XYZ ABC 9.0 E, WA US","2019-01-06","0.65",",,N"
"US183459384","XYZ ABC 9.0 E, WA US","2019-01-12","0.46",",,N"
"US183459384","XYZ ABC 9.0 E, WA US","2019-01-13","0.09",",,N"
"US183459384","XYZ ABC 9.0 E, WA US","2019-01-14","0.01",",,N"
"US183459384","XYZ ABC 9.0 E, WA US","2019-01-15","0.60",",,N"
"US183459384","XYZ ABC 9.0 E, WA US","2019-01-16","1.93",",,N"

And my code attempt is below.

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define BUFFER_SIZE 1024

int get_row(FILE *file, int row_num, char delim[]) {
    int n_line = 0;
    int field = 0;
    char row[BUFFER_SIZE], *line[BUFFER_SIZE];

    while (fgets(row, BUFFER_SIZE, file)) {
        if (n_line == row_num) {
            printf("Length of line %d is %ld elements!\n", n_line, strlen(row));
            char* element = strtok(row, delim);
            while (element != NULL) {
                printf("%s\n", element);
                line[field++] = strdup(element);
                element = strtok(NULL, delim);
            }
            return 0;
        } else {
          n_line++;
    }
    printf("There is no row %d in the file you selected.\n", row_num);
    return 0;
}

int main(int argc, char **argv) {
    FILE *file;
    char delim[]  = ", \"";

    file = fopen(”test.csv”, "r");
    if (!file) {
        printf("Error: could not open %s\n", file_name);
        return -1;
    }
    printf("Reading file...\n");
    get_row(file, 0, delim);
    fclose(file);
    return 0;
}

I expect the result to show 5 but the result is 0 or 1 for all lines and occasionally 6.


Solution

  • This program should not compile as file_name is not defined. Also, inside the getrow function no of elements should be equal to field and not the length of the buffer. Also the comma delim will not work as there are commas in the field. The following code is working for the given lines in the test.csv file

    #include <stdio.h>
    #include <stdlib.h>
    #include <string.h>
    
    #define BUFFER_SIZE 1024
    
    int get_row(FILE *file, int row_num, char delim[]) {
        int n_line = 0;
        int field = 0;
        char row[BUFFER_SIZE], *line[BUFFER_SIZE];
    
        while (fgets(row, BUFFER_SIZE, file)) {
            if (n_line == row_num) {
                char* element = strtok(row, delim);
                while (element != NULL) {
                    if(strcmp(",", element) != 0 &&  strcmp("\n", element) != 0)
                    {
                      printf("%s\n", element);
                      line[field++] = strdup(element);
                    }
                    element = strtok(NULL, delim);
                }
                printf("Length of line %d is %d elements!\n", n_line, field);
                return 0;
            } else {
              n_line++;
        }
      }
        printf("There is no row %d in the file you selected.\n", row_num);
        return 0;
    }
    
    int main(int argc, char **argv) {
        FILE *file;
        char delim[]  = "\"";
        char file_name[] = "test.csv";
    
        file = fopen(file_name, "r");
        if (!file) {
            printf("Error: could not open %s\n", file_name);
            return -1;
        }
        printf("Reading file...\n");
        get_row(file, 0, delim);
        fclose(file);
        return 0;
    }