cshellunixstrsep

Calling printf() after strsep() inside for loop causes a segfault


I'm writing my own UNIX shell in C and I'm trying to add support for passing multi-word arguments within quotation marks (i.e. echo "This is a test"). In my current function (parseCommandWords) that you can see below, I'm successfully separating the words passed to the function via the input parameter and updating input appropriately through strsep(). However, once the printf() call runs and prints the correct values for word and input, a segmentation fault is thrown. It never reaches any of the if statements below the printf, adding anything below it, simply doesn't run. I'm failing to see what is causing the issue. Testing it with input = ls for example (simple command), it prints word = ls | input = (null) as you would expect.

The parsedWords parameter is an array of NULL strings initially and the params are also validated before being passed to the function.

Update #1: The issue is almost definitely with the strcpy(parsedWords[i],word). Changing it to parsedWords[i] = word doesn't cause the segfault but of course, it loses its value once we exit the function. I was able to pinpoint this using Valgrind when it notified me of an illegal read/write.

Update 2: I think the issue was with the way i was initializing my args array of char* inside of parseInput. Initializing each char* with NULL and then attempting to write at that location using strcpy should be what is causing the issue, correct? Allocating memory for each string dynamically like so fixes the issue:

char *args[MAX_NUM_OF_COMMAND_WORDS];
int i;
for(i=0; i < MAX_NUM_OF_COMMAND_WORDS; i++) {
    args[i] = (char *)malloc(50*sizeof(char));
}

Full code:

#include <stdio.h>
#include <unistd.h>
#include <sys/wait.h>
#include <string.h>
#include <stdlib.h>
#include "cs345sh.h"

/**
 * Counts how many times the given char is present 
 * in the given string.
 * @param input The string in which to look for
 * @param lookupChar The char whose occurences to count
 * @return The number of occurences of the given char
 **/
int countCharOccurences(char *input, char lookupChar)
{
    char *str = input;
    int count = 0;
    int i;
    for (i = 0; str[i]; i++)
    {
        if (str[i] == lookupChar)
            count++;
    }
    return count;
}

/**
 * Parses the available command words in the given command and places
 * them in the given array.
 * @param input The initial string to split that contains the command.
 * @param parsedWords The final parsed commands.
 **/
void parseCommandWords(char *input, char **parsedWords)
{
    int i;
    for (i = 0; i < MAX_NUM_OF_COMMAND_WORDS; i++)
    {
        char *word = (char *)malloc(100 * sizeof(char)); // max 100 chars
        if (!word)
        {
            perror("Failed to allocate memory!\n");
            exit(EXIT_FAILURE);
        }
        if (input[0] == '\"')
        {
            char *inptPtr = input;
            int charCnt = 0;
            do
            {
                inptPtr++;
                charCnt++;
            } while (inptPtr[0] != '\"');
            charCnt++; // include final "
            strncpy(word, input, charCnt);
            // check if there are chars left to parse or not
            if (++inptPtr != NULL)
            {
                input = ++inptPtr; // start after the ending "
            }
            else
            {
                input = "";
            }
            printf("word after loop = %s\ninput = %s\n", word, input);
            strcpy(parsedWords[i],word);
            free(word);
            continue;
        }
        word = strsep(&input, " ");
        printf("word = %s | input = %s\n",word,input);
        if (word == NULL)
        {
            free(word);
            break; // there was nothing to split
        }
        if (strlen(word) == 0)
        {
            free(word);
            i--; // read an empty command, re-iterate
            continue;
        }
        printf("before cpy");
        strcpy(parsedWords[i],word);
        printf("word = %s | parsedwords[i] = %s\n",word,parsedWords[i]);
        free(word);

        if(input == NULL) break;
    }
    printf("exiting parser");
}

/**
 * Parses the available commands in the given string and places
 * them in the given array.
 * @param input The initial string to split that contains the commands.
 * @param parsedWords The final parsed commands.
 **/
void parseMultipleCommands(char *input, char **parsedCommands)
{
    int numOfSemicolons = countCharOccurences(input, ';');
    int i;
    for (i = 0; i < numOfSemicolons + 1; i++)
    {
        char *word = strsep(&input, ";");
        if (word == NULL)
            break;
        if (strlen(word) == 0)
        {
            i--;
            continue;
        }
        parsedCommands[i] = word;
    }
}

char *removeLeadingWhitespace(char *input)
{
    while (*input == ' ')
    {
        input++;
    }
    return input;
}

/**
 * Splits the given string at each pipe char occurance and places
 * each command in the given array.
 * @param input The initial string to split
 * @param inptParsed The final parsed commands split at the pipe chars
 * @return Returns 0 if no pipe chars were found or 1 if the operatio was successful.
 **/
int splitAtPipe(char *input, char **inptParsed)
{
    int numOfPipes = countCharOccurences(input, '|');
    int i;
    // create a copy of the given input in order to preserver the original
    char *inpt = (char *)malloc(MAX_INPUT_SIZE * sizeof(char));
    strcpy(inpt, input);
    for (i = 0; i < numOfPipes + 1; i++)
    {
        char *word = strsep(&inpt, "|");
        if (word == NULL)
            break;
        if (strlen(word) == 0)
        {
            i--;
            continue;
        }

        word = removeLeadingWhitespace(word);
        inptParsed[i] = word;
    }
    return 1;
}

/**
 * Handles the execution of custom commands (i.e. cd, exit).
 * @param cmdInfo An array containing the command to execute in the first position, and the arguments
 * to execute with in the rest of the array.
 * @return Returns 0 if the command couldn't be executed, or 1 otherwise.
 **/
int handleCustomCommands(char **cmdInfo)
{
    int numOfCustomCommands = 2;
    char *customCommands[numOfCustomCommands];
    customCommands[0] = "cd";
    customCommands[1] = "exit";
    int i;
    for (i = 0; i < numOfCustomCommands; i++)
    {
        // find the command to execute
        if (strcmp(cmdInfo[0], customCommands[i]) == 0)
            break;
    }

    switch (i)
    {
    case 0:
        if (chdir(cmdInfo[1]) == -1)
            return 0;
        else
            return 1;
    case 1:
        exit(0);
        return 1;
    default:
        break;
    }
    return 0;
}

/**
 * Displays the shell prompt in the following format:
 * <user>@cs345sh/<dir>$
 **/
void displayPrompt()
{
    char *user = getlogin();
    char cwd[512]; // support up to 512 chars long dir paths
    if (getcwd(cwd, sizeof(cwd)) == NULL)
    {
        perror("error retrieving current working directory.");
        exit(-1);
    }
    else if (user == NULL)
    {
        perror("error getting currently logged in user.");
        exit(-1);
    }
    else
    {
        printf("%s@cs345%s$ ", user, cwd);
    }
}

void execSystemCommand(char **args)
{
    // create an identical child process
    pid_t pid = fork();

    if (pid == -1)
    {
        perror("\nFailed to fork child..");
        exit(EXIT_FAILURE);
    }
    else if (pid == 0)
    {
        if (execvp(args[0], args) < 0)
        {
            perror("Could not execute given command..");
        }
        exit(EXIT_FAILURE);
    }
    else
    {
        // wait for the child process to finish
        wait(NULL);
        return;
    }
}

void execPipedCommands(char *input, char **commands)
{
    int numOfPipes = countCharOccurences(input, '|');
    int fds[2 * numOfPipes]; // two file descriptors per pipe needed for interprocess communication
    int i;
    pid_t cpid;

    // initialize all pipes and store their respective fds in the appropriate place in the array
    for (i = 0; i < numOfPipes; i++)
    {
        if (pipe(fds + 2 * i) == -1)
        {
            perror("Failed to create file descriptors for pipe commands!\n");
            exit(EXIT_FAILURE);
        }
    }

    for (i = 0; i < numOfPipes + 1; i++)
    {
        if (commands[i] == NULL)
            break;
        char *args[MAX_NUM_OF_COMMAND_WORDS] = {
            NULL,
        };
        parseCommandWords(commands[i], args);
        cpid = fork(); // start a child process
        if (cpid == -1)
        {
            perror("Failed to fork..\n");
            exit(EXIT_FAILURE);
        }

        if (cpid == 0)
        { // child process is executing
            if (i != 0)
            { // if this is not the first command in the chain
                // duplicate the file descriptor to read from the previous command's output
                if (dup2(fds[(i - 1) * 2], STDIN_FILENO) < 0)
                {
                    perror("Failed to read input from previous command..\n");
                    exit(EXIT_FAILURE);
                }
            }

            // if this is not the last command in the chain
            if (i != numOfPipes && commands[i + 1] != NULL)
            {
                // duplicate write file descriptor in order to output to the next command
                if (dup2(fds[(i * 2 + 1)], STDOUT_FILENO) < 0)
                {
                    perror("Failed to write output for the next command..\n");
                    exit(EXIT_FAILURE);
                }
            }

            // close the pipes
            int j;
            for (j = 0; j < numOfPipes + 1; j++)
            { // close all copies of the file descriptors
                close(fds[j]);
            }

            // execute command
            if (execvp(args[0], args) < 0)
            {
                perror("Failed to execute given piped command");
                return;
            }
        }
    }
    // parent closes all original file descriptors
    for (i = 0; i < numOfPipes + 1; i++)
    {
        close(fds[i]);
    }

    // parent waits for all child processes to finish
    for (i = 0; i < numOfPipes + 1; i++)
        wait(0);
}

void parseInput(char *input)
{
    if (strchr(input, '|') != NULL)
    { // possibly piped command(s)
        char *commands[MAX_NUM_OF_COMMANDS] = {
            NULL,
        };
        splitAtPipe(input, commands);
        execPipedCommands(input, commands);
    }
    else if (strchr(input, ';') != NULL)
    { // possibly multiple command(s)
        char *commands[MAX_NUM_OF_COMMANDS] = {
            NULL,
        };
        parseMultipleCommands(input, commands);
        int i;
        for (i = 0; i < MAX_NUM_OF_COMMANDS; i++)
        {
            if (commands[i] == NULL)
                break;
            // single command
            char *args[MAX_NUM_OF_COMMAND_WORDS] = {
                NULL,
            };
            parseCommandWords(commands[i], args);
            if (handleCustomCommands(args) == 0)
            {
                execSystemCommand(args);
            }
        }
    }
    else
    {
        // single command
        char *args[MAX_NUM_OF_COMMAND_WORDS] = {
            NULL,
        };
        parseCommandWords(input, args);
        printf("parsed! arg[0] = %s\n",args[0]);
        if (handleCustomCommands(args) == 0)
        {
            execSystemCommand(args);
        }
    }
}

int main()
{
    char *inputBuf = NULL; // getline will allocate the buffer
    size_t inputLen = 0;
    while (1)
    {
        displayPrompt();
        if (getline(&inputBuf, &inputLen, stdin) == -1)
        {
            perror("Error reading input.");
            exit(EXIT_FAILURE);
        }
        if (*inputBuf == '\n')
            continue;
        else
        {
            // remove the \n at the end of the read line ()
            inputBuf[strcspn(inputBuf, "\n")] = '\0';
            parseInput(inputBuf);
        }
    }
    return 0;
}

Here's the minimum reproducible example:

#include <stdio.h>
#include <unistd.h>
#include <sys/wait.h>
#include <string.h>
#include <stdlib.h>
#include "cs345sh.h"

/**
 * Counts how many times the given char is present 
 * in the given string.
 * @param input The string in which to look for
 * @param lookupChar The char whose occurences to count
 * @return The number of occurences of the given char
 **/
int countCharOccurences(char *input, char lookupChar)
{
    char *str = input;
    int count = 0;
    int i;
    for (i = 0; str[i]; i++)
    {
        if (str[i] == lookupChar)
            count++;
    }
    return count;
}

/**
 * Parses the available command words in the given command and places
 * them in the given array.
 * @param input The initial string to split that contains the command.
 * @param parsedWords The final parsed commands.
 **/
void parseCommandWords(char *input, char **parsedWords)
{
    int i;
    for (i = 0; i < MAX_NUM_OF_COMMAND_WORDS; i++)
    {
        char *word = (char *)malloc(100 * sizeof(char)); // max 100 chars
        if (!word)
        {
            perror("Failed to allocate memory!\n");
            exit(EXIT_FAILURE);
        }
        if (input[0] == '\"')
        {
            char *inptPtr = input;
            int charCnt = 0;
            do
            {
                inptPtr++;
                charCnt++;
            } while (inptPtr[0] != '\"');
            charCnt++; // include final "
            strncpy(word, input, charCnt);
            // check if there are chars left to parse or not
            if (++inptPtr != NULL)
            {
                input = ++inptPtr; // start after the ending "
            }
            else
            {
                input = "";
            }
            printf("word after loop = %s\ninput = %s\n", word, input);
            strcpy(parsedWords[i],word);
            free(word);
            continue;
        }
        word = strsep(&input, " ");
        printf("word = %s | input = %s\n",word,input);
        if (word == NULL)
        {
            free(word);
            break; // there was nothing to split
        }
        if (strlen(word) == 0)
        {
            free(word);
            i--; // read an empty command, re-iterate
            continue;
        }
        printf("before cpy");
        strcpy(parsedWords[i],word);
        printf("word = %s | parsedwords[i] = %s\n",word,parsedWords[i]);
        free(word);

        if(input == NULL) break;
    }
    printf("exiting parser");
}
 

/**
 * Handles the execution of custom commands (i.e. cd, exit).
 * @param cmdInfo An array containing the command to execute in the first position, and the arguments
 * to execute with in the rest of the array.
 * @return Returns 0 if the command couldn't be executed, or 1 otherwise.
 **/
int handleCustomCommands(char **cmdInfo)
{
    int numOfCustomCommands = 2;
    char *customCommands[numOfCustomCommands];
    customCommands[0] = "cd";
    customCommands[1] = "exit";
    int i;
    for (i = 0; i < numOfCustomCommands; i++)
    {
        // find the command to execute
        if (strcmp(cmdInfo[0], customCommands[i]) == 0)
            break;
    }

    switch (i)
    {
    case 0:
        if (chdir(cmdInfo[1]) == -1)
            return 0;
        else
            return 1;
    case 1:
        exit(0);
        return 1;
    default:
        break;
    }
    return 0;
}

/**
 * Displays the shell prompt in the following format:
 * <user>@cs345sh/<dir>$
 **/
void displayPrompt()
{
    char *user = getlogin();
    char cwd[512]; // support up to 512 chars long dir paths
    if (getcwd(cwd, sizeof(cwd)) == NULL)
    {
        perror("error retrieving current working directory.");
        exit(-1);
    }
    else if (user == NULL)
    {
        perror("error getting currently logged in user.");
        exit(-1);
    }
    else
    {
        printf("%s@cs345%s$ ", user, cwd);
    }
}

void execSystemCommand(char **args)
{
    // create an identical child process
    pid_t pid = fork();

    if (pid == -1)
    {
        perror("\nFailed to fork child..");
        exit(EXIT_FAILURE);
    }
    else if (pid == 0)
    {
        if (execvp(args[0], args) < 0)
        {
            perror("Could not execute given command..");
        }
        exit(EXIT_FAILURE);
    }
    else
    {
        // wait for the child process to finish
        wait(NULL);
        return;
    }
}

void parseInput(char *input)
{
        // single command
        char *args[MAX_NUM_OF_COMMAND_WORDS] = {
            NULL,
        };
        parseCommandWords(input, args);
        printf("parsed! arg[0] = %s\n",args[0]);
        if (handleCustomCommands(args) == 0)
        {
            execSystemCommand(args);
        }
}

int main()
{
    char *inputBuf = NULL; // getline will allocate the buffer
    size_t inputLen = 0;
    while (1)
    {
        displayPrompt();
        if (getline(&inputBuf, &inputLen, stdin) == -1)
        {
            perror("Error reading input.");
            exit(EXIT_FAILURE);
        }
        if (*inputBuf == '\n')
            continue;
        else
        {
            // remove the \n at the end of the read line ()
            inputBuf[strcspn(inputBuf, "\n")] = '\0';
            parseInput(inputBuf);
        }
    }
    return 0;
}

Header file:

#define MAX_NUM_OF_COMMAND_WORDS 50 // usual num of maximum command arguments is 9 (but is system dependent)
#define MAX_NUM_OF_COMMANDS 20 // what could it be hmm
#define MAX_INPUT_SIZE 1000 // num of max chars to read

/**
 * Counts how many times the given char is present 
 * in the given string.
 * @param input The string in which to look for
 * @param lookupChar The char whose occurences to count
 * @return The number of occurences of the given char
 **/
int countCharOccurences(char* input, char lookupChar);
/**
 * Parses the available command words in the given command and places
 * them in the given array.
 * @param input The initial string to split that contains the command.
 * @param parsedWords The final parsed commands.
 **/
void parseCommandWords(char *input, char** parsedWords);

/**
 * Parses the available commands in the given string and places
 * them in the given array.
 * @param input The initial string to split that contains the commands.
 * @param parsedWords The final parsed commands.
 **/
void parseMultipleCommands(char *input, char **parsedCommands);

/**
 * Splits the given string at each pipe char and places
 * each command in the given array.
 * @param input The initial string to split
 * @param inptParsed The final parsed commands split at the pipe chars
 * @return Returns 0 if no pipe chars were found or 1 if the operation was successful.
 **/
int splitAtPipe(char *input, char** inptParsed);

/**
 * Handles the execution of custom commands (i.e. cd, exit).
 * @param cmdInfo An array containing the command to execute in the first position, and the arguments
 * to execute with in the rest of the array.
 * @return Returns 0 if the command couldn't be executed, or 1 otherwise.
 **/ 
int handleCustomCommands(char **command);

/**
 * Displays the shell prompt in the following format:
 * <user>@cs345sh/<dir>$
 **/
void displayPrompt();

void execPipedCommands(char*, char**);

/**
 * Removes any trailing whitespace from the given string
 * and returns a pointer at the beginning of the new string.
 * @param input The string to remove whitespace from
 */
char* removeLeadingWhitespace(char *input) ;

Solution

  • Using Valgrind I was able to pinpoint that the issue was due to me not allocating memory properly for my args array and then trying to use that memory with strcpy. More specifically:

    When using char *args[MAX_NUM_OF_COMMAND_WORDS] = { NULL,} I was not allocating memory for the arguments themselves, but for the pointers. This lead to the segfault error since strcpy(parsedWords[i],word); would then try to write to the invalid memory (since parsedWords[i] would be NULL). I refactored the code so that I only allocate memory for the args that I need instead of mindlessly allocating memory for 50 args when I don't even know if I need that many. I then return the count of args found in the given command from the parseCommandWords() function, which is then used to free the allocated memory.

    Here's the changed code (updated to the final version on 9/11/21):

    /**
     * Parses the available command words in the given command and places
     * them in the given array.
     * @param input The initial string to split that contains the command.
     * @param parsedWords An array to every command word.
     * @return The number of words in the given command
     **/
    int parseCommandWords(char *input, char **parsedWords)
    {
        int i;
        int cnt = 0;
        for (i = 0; i < MAX_NUM_OF_COMMAND_WORDS; i++)
        {
            char word[MAX_NUM_OF_COMMAND_WORDS];
            input = removeLeadingWhitespace(input);
            if (strlen(input) == 0)
                break;
            if (input[0] == '\"')
            {
                char *inptPtr = input + 1; // start after the beginning " char
                int charCnt = 0;
                while (inptPtr[0] != '\"')
                {
                    inptPtr++;
                    charCnt++;
                }
                if (charCnt >= MAX_NUM_OF_COMMAND_WORDS)
                {
                    perror("Quoted argument was too long!\n");
                    exit(EXIT_FAILURE);
                }
                strncpy(word, input + 1, charCnt); // input+1 : start after the beginning " and charCnt: end before the closing "
                word[charCnt] = '\0';              // add null terminator
                // check if there are chars left to parse or not
                if (strlen(++inptPtr) > 0)
                {
                    input = inptPtr;
                }
                else
                {
                    input = NULL;
                }
                parsedWords[i] = (char *)malloc(MAX_NUM_OF_COMMAND_WORDS * sizeof(char));
                cnt++;
                strcpy(parsedWords[i], word);
                if (input == NULL || strlen(input) == 0)
                    return cnt;
                else
                    continue;
            }
            strcpy(word, strsep(&input, " "));
            if (word == NULL)
                break;             // nothing to split
            if (strlen(word) == 0) // read an empty command, re-iterate
            {
                i--;
                continue;
            }
            parsedWords[i] = (char *)malloc(MAX_NUM_OF_COMMAND_WORDS * sizeof(char));
            if (!parsedWords[i])
            {
                perror("Failed to allocate memory for command\n");
                exit(EXIT_FAILURE);
            }
            cnt++;
            strcpy(parsedWords[i], word);
            if (input == NULL || strlen(input) == 0)
                break;
        }
        return cnt;
    }
        
    /**
     * Executes the given commands after parsing them according to 
     * their type (i.e. pipes, redirection, etc.).
     * @param input A line read from the shell containing commands to execute
     * */
    void parseInput(char *input)
    {
        if (strchr(input, '|') != NULL)
        {
            // possibly piped command(s)
            char *commands[MAX_NUM_OF_COMMANDS] = {
                NULL,
            };
            int numOfCmds = splitAtPipe(input, commands);
            execPipedCommands(input, commands);
            int i;
            for (i = 0; i < numOfCmds; i++)
                if (commands[i] != NULL)
                    free(commands[i]);
        }
        else if (strchr(input, '>') != NULL || strchr(input, '<') != NULL)
        { // no need to check for >> since we check for >
            // redirection commands
            char *commands[MAX_NUM_OF_REDIR_CMDS] = {
                NULL,
            };
            char *delim = (char *)malloc(3 * sizeof(char));
    
            if (strstr(input, ">>"))
                strcpy(delim, ">>");
            else if (strchr(input, '>'))
                strcpy(delim, ">");
            else
                strcpy(delim, "<");
    
            splitAtRedirectionDelim(input, commands, delim);
            execRedirectionCommands(input, commands, delim);
    
            int i;
            for (i = 0; i < MAX_NUM_OF_REDIR_CMDS; i++)
                if (commands[i] != NULL)
                    free(commands[i]);
            free(delim);
        }
        else if (strchr(input, ';') != NULL)
        {
            // possibly multiple command(s)
            char *commands[MAX_NUM_OF_COMMANDS] = {
                NULL,
            };
            int numOfCmds = parseMultipleCommands(input, commands);
            int i;
            for (i = 0; i < numOfCmds; i++)
            {
                if (commands[i] == NULL)
                    break;
                // single command
                char *args[MAX_NUM_OF_COMMAND_WORDS] = {
                    NULL,
                };
                int numOfArgs = parseCommandWords(commands[i], args);
                if (handleCustomCommands(args,numOfArgs,input) == 0)
                {
                    execSystemCommand(args);
                }
                int j;
                for (j = 0; j < numOfArgs; j++)
                {
                    free(args[j]);
                }
                if (commands[i] != NULL)
                    free(commands[i]);
            }
        }
        else
        {
            // single command
            char *args[MAX_NUM_OF_COMMAND_WORDS] = {
                NULL,
            };
            int numOfArgs = parseCommandWords(input, args);
            if (handleCustomCommands(args,numOfArgs,input) == 0)
            {
                execSystemCommand(args);
            }
            int i;
            for (i = 0; i < numOfArgs; i++)
            {
                free(args[i]);
            }
        }
    }
    

    As multiple people have noted, my code contains a lot of other issues as well (mem issues, the logic for handling quoted args is faulty, etc.), so I'm going to listen to them, take a step back and try to test everything in pieces before continuing.