cstringdelimiterstrtok

splitting a string up using different delimiters in C


I am trying to split up a string by using different delimiters. After hours of trial and error using strtok(), I have finally got a way to make it work. However it uses NULLs in the place of given strings in strtok, and I dont fully understand how it works.

I have tried to split it up so I can save it in separate variables so i can use them to return functions within my main function, but it doesnt work, which leads me to believe it is incredibly flimsy way of splitting the string up.

the input string is read from a config file and is in this format:

(6,2) SLUG 1 0 EAST

the current code i'm using is this:

void createSlug(char* data) {
        int slugPosX, slugPosY, slugAge;
        char *slugDir;
        char *token1;
        char *token2;

        slugPosX = atoi(strtok(data, "("));

        token1 = strtok(data, ",");
        slugPosY = atoi(strtok(strtok(NULL, ","), ")"));

        token2 = strtok(strtok(NULL, ","), ")");
        slugAge = atoi(strtok(token2, " SLUG "));

        slugDir = strtok(NULL, " 0 ");

        printf("slug position is: (%d,%d), with age %d, and direction: %s", slugPosX, slugPosY, slugAge, slugDir);

}

the output would be printed as:

slug position is: (6,2), with age 1, and direction: EAST

The input file changes but is always in the above format. It is also worth mentioning that the '0' in the input string is always 0, so I ignored that part of it, as I could use it as the delimiter.

Is there an easier way of doing this? I'm very very new to C so any help would be greatly appreciated


Solution

  • If you are sure that the characters '(', ',', ')' and ' ' are only used as delimiters and don't ever occur in the tokens, then you can simply use "(,) " as the delimiter string in all calls to strtok:

    #include <stdio.h>
    #include <stdlib.h>
    #include <string.h>
    
    void createSlug( char* data )
    {
        int slugPosX, slugPosY, slugAge;
        char *slugDir;
        const char *delim = "(,) ";
    
        slugPosX = atoi( strtok(data, delim) );
        slugPosY = atoi( strtok(NULL, delim) );
    
        //ignore the "SLUG" token
        strtok( NULL, delim );
    
        slugAge = atoi( strtok(NULL, delim) );
    
        //ignore the "0" token
        strtok( NULL, delim );
    
        slugDir = strtok( NULL, delim );
    
        printf( "slug position is: (%d,%d), with age %d, and direction: %s", slugPosX, slugPosY, slugAge, slugDir );
    }
    
    int main( void )
    {
        char str[] = "(6,2) SLUG 1 0 EAST";
    
        createSlug( str );
    }
    

    However, this program may crash if strtok ever returns NULL due to the input not being in the expected format. Here is a different version which does a lot more input validation and prints an error message instead of crashing:

    #include <stdio.h>
    #include <stdlib.h>
    #include <string.h>
    
    void createSlug( char* data )
    {
        int slugPosX, slugPosY, slugAge;
        char *slugDir;
        char *token, *p;
        const char *delim = "(,) ";
    
        //attempt to find first token
        token = strtok( data, delim );
        if ( token == NULL )
        {
            fprintf( stderr, "Unable to find first token!\n" );
            exit( EXIT_FAILURE );
        }
    
        //attempt to convert first token to an integer
        slugPosX = strtol( token, &p, 10 );
        if ( *p != '\0' )
        {
            fprintf( stderr, "Unable to convert first token to an integer!\n" );
            exit( EXIT_FAILURE );
        }
    
        //attempt to find second token
        token = strtok( NULL, delim );
        if ( token == NULL )
        {
            fprintf( stderr, "Unable to find second token!\n" );
            exit( EXIT_FAILURE );
        }
    
        //attempt to convert second token to an integer
        slugPosY = strtol( token, &p, 10 );
        if ( *p != '\0' )
        {
            fprintf( stderr, "Unable to convert second token to an integer!\n" );
            exit( EXIT_FAILURE );
        }
    
        //attempt to find third token
        token = strtok( NULL, delim );
        if ( token == NULL )
        {
            fprintf( stderr, "Unable to find third token!\n" );
            exit( EXIT_FAILURE );
        }
    
        //verify that third token contains "SLUG"
        if ( strcmp( token, "SLUG" ) != 0 )
        {
            fprintf( stderr, "Invalid content of third token!\n" );
            exit( EXIT_FAILURE );
        }
    
        //attempt to find fourth token
        token = strtok( NULL, delim );
        if ( token == NULL )
        {
            fprintf( stderr, "Unable to find fourth token!\n" );
            exit( EXIT_FAILURE );
        }
    
        //attempt to convert fourth token to an integer
        slugAge = strtol( token, &p, 10 );
        if ( *p != '\0' )
        {
            fprintf( stderr, "Unable to convert fourth token to an integer!\n" );
            exit( EXIT_FAILURE );
        }
    
        //attempt to find fifth token
        token = strtok( NULL, delim );
        if ( token == NULL )
        {
            fprintf( stderr, "Unable to find fifth token!\n" );
            exit( EXIT_FAILURE );
        }
    
        //verify that fifth token contains "0"
        if ( strcmp( token, "0" ) != 0 )
        {
            fprintf( stderr, "Invalid content of fifth token!\n" );
            exit( EXIT_FAILURE );
        }
    
        //attempt to find sixth token
        slugDir = strtok( NULL, delim );
        if ( slugDir == NULL )
        {
            fprintf( stderr, "Unable to find sixth token!\n" );
            exit( EXIT_FAILURE );
        }
    
        printf( "slug position is: (%d,%d), with age %d, and direction: %s", slugPosX, slugPosY, slugAge, slugDir );
    }
    
    int main( void )
    {
        char str[] = "(6,2) SLUG 1 0 EAST";
    
        createSlug( str );
    }
    

    However, a significant amount of the code is now duplicated to a certain degree, so the code is not very maintainable.

    Therefore, it may be better to use a more systematic approach, by for example

    Here is an alternative solution which does this:

    #include <stdio.h>
    #include <stdlib.h>
    #include <string.h>
    #include <stdbool.h>
    
    #define NUM_TOKENS 6
    
    // This function will return true if it was able to convert
    // the token to an int, otherwise it will return false.
    bool convert_token_to_int( char *str, int *result )
    {
        long num;
        char *p;
    
        num = strtol( str, &p, 10 );
    
        if ( p == str || *p != '\0' )
        {
            return false;
        }
    
        *result = num;
        return true;
    }
    
    void createSlug( char* data )
    {
        char *tokens[NUM_TOKENS];
        int slugPosX, slugPosY, slugAge;
        char *slugDir;
        const char *delim = "(,) ";
    
        //store all tokens in the array tokens
        tokens[0] = strtok( data, delim );
        for ( int i = 0; ; )
        {
            if ( tokens[i] == NULL )
            {
                fprintf( stderr, "Could not find token #%d!\n", i );
                return;
            }
    
            //break out of loop after finishing all tokens
            if ( ++i == NUM_TOKENS )
                break;
    
            //find next token for next loop iteration
            tokens[i] = strtok( NULL, delim );
        }
    
        //convert the integer tokens
        if (
            ! convert_token_to_int( tokens[0], &slugPosX )
            ||
            ! convert_token_to_int( tokens[1], &slugPosY )
            ||
            ! convert_token_to_int( tokens[3], &slugAge )
        )
        {
            fprintf( stderr, "Error converting tokens to integers!\n" );
            exit( EXIT_FAILURE );
        }
    
        //verify that non-variable tokens contain the
        //intended values
        if (
            strcmp( tokens[2], "SLUG" ) != 0
            ||
            strcmp( tokens[4], "0" ) != 0
        )
        {
            fprintf( stderr, "Non-variable tokens do not contain the intended values!\n" );
            exit( EXIT_FAILURE );
        }
    
        //make slugDir point to the appropriate token
        slugDir = tokens[5];
    
        printf( "slug position is: (%d,%d), with age %d, and direction: %s", slugPosX, slugPosY, slugAge, slugDir );
    }
    
    int main( void )
    {
        char str[] = "(6,2) SLUG 1 0 EAST";
    
        createSlug( str );
    }