I'm writing my own UNIX shell in C and I'm trying to add support for passing multi-word arguments within quotation marks (i.e. echo "This is a test"
). In my current function (parseCommandWords
) that you can see below, I'm successfully separating the words passed to the function via the input parameter and updating input appropriately through strsep()
. However, once the printf()
call runs and prints the correct values for word
and input
, a segmentation fault is thrown. It never reaches any of the if statements below the printf, adding anything below it, simply doesn't run. I'm failing to see what is causing the issue. Testing it with input = ls
for example (simple command), it prints word = ls | input = (null)
as you would expect.
The parsedWords
parameter is an array of NULL strings initially and the params are also validated before being passed to the function.
Update #1: The issue is almost definitely with the strcpy(parsedWords[i],word)
. Changing it to parsedWords[i] = word
doesn't cause the segfault but of course, it loses its value once we exit the function. I was able to pinpoint this using Valgrind when it notified me of an illegal read/write.
Update 2: I think the issue was with the way i was initializing my args
array of char* inside of parseInput
. Initializing each char* with NULL and then attempting to write at that location using strcpy should be what is causing the issue, correct? Allocating memory for each string dynamically like so fixes the issue:
char *args[MAX_NUM_OF_COMMAND_WORDS];
int i;
for(i=0; i < MAX_NUM_OF_COMMAND_WORDS; i++) {
args[i] = (char *)malloc(50*sizeof(char));
}
Full code:
#include <stdio.h>
#include <unistd.h>
#include <sys/wait.h>
#include <string.h>
#include <stdlib.h>
#include "cs345sh.h"
/**
* Counts how many times the given char is present
* in the given string.
* @param input The string in which to look for
* @param lookupChar The char whose occurences to count
* @return The number of occurences of the given char
**/
int countCharOccurences(char *input, char lookupChar)
{
char *str = input;
int count = 0;
int i;
for (i = 0; str[i]; i++)
{
if (str[i] == lookupChar)
count++;
}
return count;
}
/**
* Parses the available command words in the given command and places
* them in the given array.
* @param input The initial string to split that contains the command.
* @param parsedWords The final parsed commands.
**/
void parseCommandWords(char *input, char **parsedWords)
{
int i;
for (i = 0; i < MAX_NUM_OF_COMMAND_WORDS; i++)
{
char *word = (char *)malloc(100 * sizeof(char)); // max 100 chars
if (!word)
{
perror("Failed to allocate memory!\n");
exit(EXIT_FAILURE);
}
if (input[0] == '\"')
{
char *inptPtr = input;
int charCnt = 0;
do
{
inptPtr++;
charCnt++;
} while (inptPtr[0] != '\"');
charCnt++; // include final "
strncpy(word, input, charCnt);
// check if there are chars left to parse or not
if (++inptPtr != NULL)
{
input = ++inptPtr; // start after the ending "
}
else
{
input = "";
}
printf("word after loop = %s\ninput = %s\n", word, input);
strcpy(parsedWords[i],word);
free(word);
continue;
}
word = strsep(&input, " ");
printf("word = %s | input = %s\n",word,input);
if (word == NULL)
{
free(word);
break; // there was nothing to split
}
if (strlen(word) == 0)
{
free(word);
i--; // read an empty command, re-iterate
continue;
}
printf("before cpy");
strcpy(parsedWords[i],word);
printf("word = %s | parsedwords[i] = %s\n",word,parsedWords[i]);
free(word);
if(input == NULL) break;
}
printf("exiting parser");
}
/**
* Parses the available commands in the given string and places
* them in the given array.
* @param input The initial string to split that contains the commands.
* @param parsedWords The final parsed commands.
**/
void parseMultipleCommands(char *input, char **parsedCommands)
{
int numOfSemicolons = countCharOccurences(input, ';');
int i;
for (i = 0; i < numOfSemicolons + 1; i++)
{
char *word = strsep(&input, ";");
if (word == NULL)
break;
if (strlen(word) == 0)
{
i--;
continue;
}
parsedCommands[i] = word;
}
}
char *removeLeadingWhitespace(char *input)
{
while (*input == ' ')
{
input++;
}
return input;
}
/**
* Splits the given string at each pipe char occurance and places
* each command in the given array.
* @param input The initial string to split
* @param inptParsed The final parsed commands split at the pipe chars
* @return Returns 0 if no pipe chars were found or 1 if the operatio was successful.
**/
int splitAtPipe(char *input, char **inptParsed)
{
int numOfPipes = countCharOccurences(input, '|');
int i;
// create a copy of the given input in order to preserver the original
char *inpt = (char *)malloc(MAX_INPUT_SIZE * sizeof(char));
strcpy(inpt, input);
for (i = 0; i < numOfPipes + 1; i++)
{
char *word = strsep(&inpt, "|");
if (word == NULL)
break;
if (strlen(word) == 0)
{
i--;
continue;
}
word = removeLeadingWhitespace(word);
inptParsed[i] = word;
}
return 1;
}
/**
* Handles the execution of custom commands (i.e. cd, exit).
* @param cmdInfo An array containing the command to execute in the first position, and the arguments
* to execute with in the rest of the array.
* @return Returns 0 if the command couldn't be executed, or 1 otherwise.
**/
int handleCustomCommands(char **cmdInfo)
{
int numOfCustomCommands = 2;
char *customCommands[numOfCustomCommands];
customCommands[0] = "cd";
customCommands[1] = "exit";
int i;
for (i = 0; i < numOfCustomCommands; i++)
{
// find the command to execute
if (strcmp(cmdInfo[0], customCommands[i]) == 0)
break;
}
switch (i)
{
case 0:
if (chdir(cmdInfo[1]) == -1)
return 0;
else
return 1;
case 1:
exit(0);
return 1;
default:
break;
}
return 0;
}
/**
* Displays the shell prompt in the following format:
* <user>@cs345sh/<dir>$
**/
void displayPrompt()
{
char *user = getlogin();
char cwd[512]; // support up to 512 chars long dir paths
if (getcwd(cwd, sizeof(cwd)) == NULL)
{
perror("error retrieving current working directory.");
exit(-1);
}
else if (user == NULL)
{
perror("error getting currently logged in user.");
exit(-1);
}
else
{
printf("%s@cs345%s$ ", user, cwd);
}
}
void execSystemCommand(char **args)
{
// create an identical child process
pid_t pid = fork();
if (pid == -1)
{
perror("\nFailed to fork child..");
exit(EXIT_FAILURE);
}
else if (pid == 0)
{
if (execvp(args[0], args) < 0)
{
perror("Could not execute given command..");
}
exit(EXIT_FAILURE);
}
else
{
// wait for the child process to finish
wait(NULL);
return;
}
}
void execPipedCommands(char *input, char **commands)
{
int numOfPipes = countCharOccurences(input, '|');
int fds[2 * numOfPipes]; // two file descriptors per pipe needed for interprocess communication
int i;
pid_t cpid;
// initialize all pipes and store their respective fds in the appropriate place in the array
for (i = 0; i < numOfPipes; i++)
{
if (pipe(fds + 2 * i) == -1)
{
perror("Failed to create file descriptors for pipe commands!\n");
exit(EXIT_FAILURE);
}
}
for (i = 0; i < numOfPipes + 1; i++)
{
if (commands[i] == NULL)
break;
char *args[MAX_NUM_OF_COMMAND_WORDS] = {
NULL,
};
parseCommandWords(commands[i], args);
cpid = fork(); // start a child process
if (cpid == -1)
{
perror("Failed to fork..\n");
exit(EXIT_FAILURE);
}
if (cpid == 0)
{ // child process is executing
if (i != 0)
{ // if this is not the first command in the chain
// duplicate the file descriptor to read from the previous command's output
if (dup2(fds[(i - 1) * 2], STDIN_FILENO) < 0)
{
perror("Failed to read input from previous command..\n");
exit(EXIT_FAILURE);
}
}
// if this is not the last command in the chain
if (i != numOfPipes && commands[i + 1] != NULL)
{
// duplicate write file descriptor in order to output to the next command
if (dup2(fds[(i * 2 + 1)], STDOUT_FILENO) < 0)
{
perror("Failed to write output for the next command..\n");
exit(EXIT_FAILURE);
}
}
// close the pipes
int j;
for (j = 0; j < numOfPipes + 1; j++)
{ // close all copies of the file descriptors
close(fds[j]);
}
// execute command
if (execvp(args[0], args) < 0)
{
perror("Failed to execute given piped command");
return;
}
}
}
// parent closes all original file descriptors
for (i = 0; i < numOfPipes + 1; i++)
{
close(fds[i]);
}
// parent waits for all child processes to finish
for (i = 0; i < numOfPipes + 1; i++)
wait(0);
}
void parseInput(char *input)
{
if (strchr(input, '|') != NULL)
{ // possibly piped command(s)
char *commands[MAX_NUM_OF_COMMANDS] = {
NULL,
};
splitAtPipe(input, commands);
execPipedCommands(input, commands);
}
else if (strchr(input, ';') != NULL)
{ // possibly multiple command(s)
char *commands[MAX_NUM_OF_COMMANDS] = {
NULL,
};
parseMultipleCommands(input, commands);
int i;
for (i = 0; i < MAX_NUM_OF_COMMANDS; i++)
{
if (commands[i] == NULL)
break;
// single command
char *args[MAX_NUM_OF_COMMAND_WORDS] = {
NULL,
};
parseCommandWords(commands[i], args);
if (handleCustomCommands(args) == 0)
{
execSystemCommand(args);
}
}
}
else
{
// single command
char *args[MAX_NUM_OF_COMMAND_WORDS] = {
NULL,
};
parseCommandWords(input, args);
printf("parsed! arg[0] = %s\n",args[0]);
if (handleCustomCommands(args) == 0)
{
execSystemCommand(args);
}
}
}
int main()
{
char *inputBuf = NULL; // getline will allocate the buffer
size_t inputLen = 0;
while (1)
{
displayPrompt();
if (getline(&inputBuf, &inputLen, stdin) == -1)
{
perror("Error reading input.");
exit(EXIT_FAILURE);
}
if (*inputBuf == '\n')
continue;
else
{
// remove the \n at the end of the read line ()
inputBuf[strcspn(inputBuf, "\n")] = '\0';
parseInput(inputBuf);
}
}
return 0;
}
Here's the minimum reproducible example:
#include <stdio.h>
#include <unistd.h>
#include <sys/wait.h>
#include <string.h>
#include <stdlib.h>
#include "cs345sh.h"
/**
* Counts how many times the given char is present
* in the given string.
* @param input The string in which to look for
* @param lookupChar The char whose occurences to count
* @return The number of occurences of the given char
**/
int countCharOccurences(char *input, char lookupChar)
{
char *str = input;
int count = 0;
int i;
for (i = 0; str[i]; i++)
{
if (str[i] == lookupChar)
count++;
}
return count;
}
/**
* Parses the available command words in the given command and places
* them in the given array.
* @param input The initial string to split that contains the command.
* @param parsedWords The final parsed commands.
**/
void parseCommandWords(char *input, char **parsedWords)
{
int i;
for (i = 0; i < MAX_NUM_OF_COMMAND_WORDS; i++)
{
char *word = (char *)malloc(100 * sizeof(char)); // max 100 chars
if (!word)
{
perror("Failed to allocate memory!\n");
exit(EXIT_FAILURE);
}
if (input[0] == '\"')
{
char *inptPtr = input;
int charCnt = 0;
do
{
inptPtr++;
charCnt++;
} while (inptPtr[0] != '\"');
charCnt++; // include final "
strncpy(word, input, charCnt);
// check if there are chars left to parse or not
if (++inptPtr != NULL)
{
input = ++inptPtr; // start after the ending "
}
else
{
input = "";
}
printf("word after loop = %s\ninput = %s\n", word, input);
strcpy(parsedWords[i],word);
free(word);
continue;
}
word = strsep(&input, " ");
printf("word = %s | input = %s\n",word,input);
if (word == NULL)
{
free(word);
break; // there was nothing to split
}
if (strlen(word) == 0)
{
free(word);
i--; // read an empty command, re-iterate
continue;
}
printf("before cpy");
strcpy(parsedWords[i],word);
printf("word = %s | parsedwords[i] = %s\n",word,parsedWords[i]);
free(word);
if(input == NULL) break;
}
printf("exiting parser");
}
/**
* Handles the execution of custom commands (i.e. cd, exit).
* @param cmdInfo An array containing the command to execute in the first position, and the arguments
* to execute with in the rest of the array.
* @return Returns 0 if the command couldn't be executed, or 1 otherwise.
**/
int handleCustomCommands(char **cmdInfo)
{
int numOfCustomCommands = 2;
char *customCommands[numOfCustomCommands];
customCommands[0] = "cd";
customCommands[1] = "exit";
int i;
for (i = 0; i < numOfCustomCommands; i++)
{
// find the command to execute
if (strcmp(cmdInfo[0], customCommands[i]) == 0)
break;
}
switch (i)
{
case 0:
if (chdir(cmdInfo[1]) == -1)
return 0;
else
return 1;
case 1:
exit(0);
return 1;
default:
break;
}
return 0;
}
/**
* Displays the shell prompt in the following format:
* <user>@cs345sh/<dir>$
**/
void displayPrompt()
{
char *user = getlogin();
char cwd[512]; // support up to 512 chars long dir paths
if (getcwd(cwd, sizeof(cwd)) == NULL)
{
perror("error retrieving current working directory.");
exit(-1);
}
else if (user == NULL)
{
perror("error getting currently logged in user.");
exit(-1);
}
else
{
printf("%s@cs345%s$ ", user, cwd);
}
}
void execSystemCommand(char **args)
{
// create an identical child process
pid_t pid = fork();
if (pid == -1)
{
perror("\nFailed to fork child..");
exit(EXIT_FAILURE);
}
else if (pid == 0)
{
if (execvp(args[0], args) < 0)
{
perror("Could not execute given command..");
}
exit(EXIT_FAILURE);
}
else
{
// wait for the child process to finish
wait(NULL);
return;
}
}
void parseInput(char *input)
{
// single command
char *args[MAX_NUM_OF_COMMAND_WORDS] = {
NULL,
};
parseCommandWords(input, args);
printf("parsed! arg[0] = %s\n",args[0]);
if (handleCustomCommands(args) == 0)
{
execSystemCommand(args);
}
}
int main()
{
char *inputBuf = NULL; // getline will allocate the buffer
size_t inputLen = 0;
while (1)
{
displayPrompt();
if (getline(&inputBuf, &inputLen, stdin) == -1)
{
perror("Error reading input.");
exit(EXIT_FAILURE);
}
if (*inputBuf == '\n')
continue;
else
{
// remove the \n at the end of the read line ()
inputBuf[strcspn(inputBuf, "\n")] = '\0';
parseInput(inputBuf);
}
}
return 0;
}
Header file:
#define MAX_NUM_OF_COMMAND_WORDS 50 // usual num of maximum command arguments is 9 (but is system dependent)
#define MAX_NUM_OF_COMMANDS 20 // what could it be hmm
#define MAX_INPUT_SIZE 1000 // num of max chars to read
/**
* Counts how many times the given char is present
* in the given string.
* @param input The string in which to look for
* @param lookupChar The char whose occurences to count
* @return The number of occurences of the given char
**/
int countCharOccurences(char* input, char lookupChar);
/**
* Parses the available command words in the given command and places
* them in the given array.
* @param input The initial string to split that contains the command.
* @param parsedWords The final parsed commands.
**/
void parseCommandWords(char *input, char** parsedWords);
/**
* Parses the available commands in the given string and places
* them in the given array.
* @param input The initial string to split that contains the commands.
* @param parsedWords The final parsed commands.
**/
void parseMultipleCommands(char *input, char **parsedCommands);
/**
* Splits the given string at each pipe char and places
* each command in the given array.
* @param input The initial string to split
* @param inptParsed The final parsed commands split at the pipe chars
* @return Returns 0 if no pipe chars were found or 1 if the operation was successful.
**/
int splitAtPipe(char *input, char** inptParsed);
/**
* Handles the execution of custom commands (i.e. cd, exit).
* @param cmdInfo An array containing the command to execute in the first position, and the arguments
* to execute with in the rest of the array.
* @return Returns 0 if the command couldn't be executed, or 1 otherwise.
**/
int handleCustomCommands(char **command);
/**
* Displays the shell prompt in the following format:
* <user>@cs345sh/<dir>$
**/
void displayPrompt();
void execPipedCommands(char*, char**);
/**
* Removes any trailing whitespace from the given string
* and returns a pointer at the beginning of the new string.
* @param input The string to remove whitespace from
*/
char* removeLeadingWhitespace(char *input) ;
Using Valgrind I was able to pinpoint that the issue was due to me not allocating memory properly for my args
array and then trying to use that memory with strcpy
. More specifically:
When using char *args[MAX_NUM_OF_COMMAND_WORDS] = { NULL,}
I was not allocating memory for the arguments themselves, but for the pointers.
This lead to the segfault error since strcpy(parsedWords[i],word);
would then try to write to the invalid memory (since parsedWords[i]
would be NULL). I refactored the code so that I only allocate memory for the args that I need instead of mindlessly allocating memory for 50 args when I don't even know if I need that many. I then return the count of args found in the given command from the parseCommandWords()
function, which is then used to free the allocated memory.
Here's the changed code (updated to the final version on 9/11/21):
/**
* Parses the available command words in the given command and places
* them in the given array.
* @param input The initial string to split that contains the command.
* @param parsedWords An array to every command word.
* @return The number of words in the given command
**/
int parseCommandWords(char *input, char **parsedWords)
{
int i;
int cnt = 0;
for (i = 0; i < MAX_NUM_OF_COMMAND_WORDS; i++)
{
char word[MAX_NUM_OF_COMMAND_WORDS];
input = removeLeadingWhitespace(input);
if (strlen(input) == 0)
break;
if (input[0] == '\"')
{
char *inptPtr = input + 1; // start after the beginning " char
int charCnt = 0;
while (inptPtr[0] != '\"')
{
inptPtr++;
charCnt++;
}
if (charCnt >= MAX_NUM_OF_COMMAND_WORDS)
{
perror("Quoted argument was too long!\n");
exit(EXIT_FAILURE);
}
strncpy(word, input + 1, charCnt); // input+1 : start after the beginning " and charCnt: end before the closing "
word[charCnt] = '\0'; // add null terminator
// check if there are chars left to parse or not
if (strlen(++inptPtr) > 0)
{
input = inptPtr;
}
else
{
input = NULL;
}
parsedWords[i] = (char *)malloc(MAX_NUM_OF_COMMAND_WORDS * sizeof(char));
cnt++;
strcpy(parsedWords[i], word);
if (input == NULL || strlen(input) == 0)
return cnt;
else
continue;
}
strcpy(word, strsep(&input, " "));
if (word == NULL)
break; // nothing to split
if (strlen(word) == 0) // read an empty command, re-iterate
{
i--;
continue;
}
parsedWords[i] = (char *)malloc(MAX_NUM_OF_COMMAND_WORDS * sizeof(char));
if (!parsedWords[i])
{
perror("Failed to allocate memory for command\n");
exit(EXIT_FAILURE);
}
cnt++;
strcpy(parsedWords[i], word);
if (input == NULL || strlen(input) == 0)
break;
}
return cnt;
}
/**
* Executes the given commands after parsing them according to
* their type (i.e. pipes, redirection, etc.).
* @param input A line read from the shell containing commands to execute
* */
void parseInput(char *input)
{
if (strchr(input, '|') != NULL)
{
// possibly piped command(s)
char *commands[MAX_NUM_OF_COMMANDS] = {
NULL,
};
int numOfCmds = splitAtPipe(input, commands);
execPipedCommands(input, commands);
int i;
for (i = 0; i < numOfCmds; i++)
if (commands[i] != NULL)
free(commands[i]);
}
else if (strchr(input, '>') != NULL || strchr(input, '<') != NULL)
{ // no need to check for >> since we check for >
// redirection commands
char *commands[MAX_NUM_OF_REDIR_CMDS] = {
NULL,
};
char *delim = (char *)malloc(3 * sizeof(char));
if (strstr(input, ">>"))
strcpy(delim, ">>");
else if (strchr(input, '>'))
strcpy(delim, ">");
else
strcpy(delim, "<");
splitAtRedirectionDelim(input, commands, delim);
execRedirectionCommands(input, commands, delim);
int i;
for (i = 0; i < MAX_NUM_OF_REDIR_CMDS; i++)
if (commands[i] != NULL)
free(commands[i]);
free(delim);
}
else if (strchr(input, ';') != NULL)
{
// possibly multiple command(s)
char *commands[MAX_NUM_OF_COMMANDS] = {
NULL,
};
int numOfCmds = parseMultipleCommands(input, commands);
int i;
for (i = 0; i < numOfCmds; i++)
{
if (commands[i] == NULL)
break;
// single command
char *args[MAX_NUM_OF_COMMAND_WORDS] = {
NULL,
};
int numOfArgs = parseCommandWords(commands[i], args);
if (handleCustomCommands(args,numOfArgs,input) == 0)
{
execSystemCommand(args);
}
int j;
for (j = 0; j < numOfArgs; j++)
{
free(args[j]);
}
if (commands[i] != NULL)
free(commands[i]);
}
}
else
{
// single command
char *args[MAX_NUM_OF_COMMAND_WORDS] = {
NULL,
};
int numOfArgs = parseCommandWords(input, args);
if (handleCustomCommands(args,numOfArgs,input) == 0)
{
execSystemCommand(args);
}
int i;
for (i = 0; i < numOfArgs; i++)
{
free(args[i]);
}
}
}
As multiple people have noted, my code contains a lot of other issues as well (mem issues, the logic for handling quoted args is faulty, etc.), so I'm going to listen to them, take a step back and try to test everything in pieces before continuing.