ccsvfileparsing

How to parse and arrange lines of a csv file based on matching word in C?


I have csv file with below format :

name,birthmonth,country,hobby
jack,jan,england,soccer
roben,july,germany,soccer
emma,dec,china,tennis
yannick,sep,france,music
alex,nov,england,cricket
thomas,apr,germany,tennis
mike,oct,netherlands,cycling
michelle,feb,france,poetry
yui,mar,japan,coding
feng,jun,china,reading

I want to parse this file using C, and put all the lines with same country name in a consecutive manner i.e shown below:

name,birthmonth,country,hobby
jack,jan,england,soccer
alex,nov,england,cricket
roben,july,germany,soccer
thomas,apr,germany,tennis
emma,dec,china,tennis
feng,jun,china,reading
yannick,sep,france,music
michelle,feb,france,poetry
mike,oct,netherlands,cycling
yui,mar,japan,coding

So far, I have tried this code below, however not able to match things properly and proceed further:

#include<stdio.h>
#include<stdlib.h>
#include<ctype.h>
#include<fcntl.h>
#include<string.h>

int main (int argc, char **argv) {
    //int line;
    char line[200];
    char *inputFile =  argv[1];
    FILE *input_csv_file;
    char a,b,c,d,e;

    input_csv_file = fopen(inputFile, "rt");

    if(input_csv_file ==0) {
        printf("Can not open input file \n");
    } 
    else {    
        //while((line = fgetc(input_csv_file)) != EOF) {
        while(fgets(line, sizeof line, input_csv_file) != NULL) {
            printf ("line = %s\n", line);
            if(sscanf(line, "%s,%s,%s,%s,%s", a,b,c,d,e)) {
            //if(sscanf(line, "%[^,], %[^,], %[^,], %[^,], %[^,]", a,b,c,d,e)) {
                printf("d=%s\n",d);

            }         

        }
    } 
    return 0;

}

I am a newbie in C/C++. Any help would be much appreciated Thanks.


Solution

  • I could write the code to get the required output. Below is the code:

    #include<stdio.h>
    #include<stdlib.h>
    #include<ctype.h>
    #include<fcntl.h>
    #include<string.h>
    
    int main(int argc, char ** argv) 
    {
        struct filedata {
            char nation[8];
            char content[50];
        };
    
        char line[100];
        char *inputFile = argv[1];
        FILE *input_csv_file;
        int iter = 0, c;
    
        char * tok;
        int count = 0;
        char ch;
        char country[] = "country";
        char header_line[50];
    
        input_csv_file = fopen(inputFile, "rt");
    
        //count line numbers of the input csv
        for(ch = getc(input_csv_file); ch!= EOF; ch=getc(input_csv_file))
            if(ch == '\n')
                count = count + 1;
    
    
        fclose(input_csv_file);
    
    
        count =  count -1;
    
        struct filedata * record[count];
        input_csv_file = fopen(inputFile, "rt");
    
        if(input_csv_file == 0) 
        {
            printf("Can not open input file\n");
        } else 
        {
            while(fgets(line, sizeof line, input_csv_file) != NULL) 
            {
                //printf("-- line = %s\n", line);
                int s_line = sizeof line;
                char dup_line[s_line];
                strcpy(dup_line, line);
    
                int h = 0;
                int s_token;
    
                tok = strtok(line, ",");
    
                while(tok != NULL) 
                {
                    h++;
                    if(h == 3)
                    {
                        s_token = sizeof tok;
                        break;
                    }
                    tok = strtok(NULL, ",");
                }
    
    
                // skipping the line having column headers
                if(compare_col(tok, country) == 0) {
                    strcpy(header_line, dup_line);
                    continue;
                }
    
                iter++;
                c = iter - 1;
    
                record[c] = (struct filedata*)malloc(sizeof(struct filedata));
                strcpy(record[c]->nation, tok);
                strcpy(record[c]->content, dup_line);
            } //while
    
            struct filedata * temp;
    
            FILE * fptr;
            fptr = fopen("nation_csv.txt", "w");
            if(fptr == NULL)
            {
                printf("Error in opening the file to write\n");
                exit(1);
            }
    
            // sorting the arr of struct nation wise
            for(iter=1; iter < count; iter++)
                for(c =0 ; c < count -1; c++) {
                    if(strcmp(record[c]->nation, record[c+1]->nation) > 0) {
                        temp = record[c];
                        record[c] = record[c+1];
                        record[c+1] = temp;
                    }
                } 
    
            for(iter=0; iter < count; ++iter) 
            {
                if(iter == 0) {
                fprintf(fptr, "%s", header_line);
                    continue;
                }
    
                fprintf(fptr, "%s", record[iter]->content);
            }
        fclose(fptr);
        }
        fclose(input_csv_file);
    }
    
    int compare_col(char a[], char b[] )
    {
        int c = 0;
        while(a[c] == b[c]) {
            if(a[c] == '\0' || b[c] == '\0')
                break;
            c++;
    
        }
    
        if(a[c] == '\0' && b[c] == '\0')
            return 0;
        else 
            return -1;
    }   
    

    Thanks for all your inputs. Any further inputs to make it better are much appreciated.

    Thanks