c regex operating-system pipe system-calls

How to correctly parse system call execution times from strace output using pipes in C?

I'm working on an operating system programming assignment to understand pipes. M3.md The task involves using regular expressions to extract information from strace output and statistic system call execution times.

However, I'm encountering issues where the times are not being correctly calculated. I suspect the parse_strace_line function is failing to properly parse the strace output lines, especially the timestamp part (e.g., <0.000123>), leading to incorrect or missing time values in the statistics. The function might not be correctly handling the line format, edge cases like incomplete lines, or the regex pattern might not accurately capture the time component from the strace output.

My environment is Ubuntu running on WSL2.

Here is the code:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/wait.h>
#include <regex.h>

#define MAX_SYSCALLS 1024  // Maximum distinct syscalls we can track
#define TOP_N 5            // Number of top results to display

/* Syscall tracking structure
 * name: syscall name (e.g., "open", "read")
 * time: accumulated time spent in this syscall (seconds)
 */
typedef struct {
    char name[64];
    double time;
} syscall_stat;

/* Main statistics container
 * stats: array of individual syscall metrics
 * count: number of unique syscalls tracked
 * total_time: sum of all syscall times for percentage calculation
 */
typedef struct {
    syscall_stat stats[MAX_SYSCALLS];
    int count;
    double total_time;
} syscall_stats;

/* Parse a line of strace output with timing information (-T option)
 * line: input string from strace output
 * syscall_name: output parameter for extracted syscall name
 * time: output parameter for extracted duration
 * Returns: 1 if successful, 0 if line doesn't contain timing info
 * 
 * Strace line format examples:
 * openat(AT_FDCWD, "/lib/libc.so.6", O_RDONLY|O_CLOEXEC) = 3 <0.000014>
 * read(0, "hello\n", 1024)               = 6 <0.000010>
 */
int parse_strace_line(char* line, char* syscall_name, double* time) {
    // Filter lines without timing information
    if (strstr(line, " = ") == NULL || strstr(line, "<") == NULL) {
        return 0;
    }

    // Regular expression pattern breakdown:
    // ^(\w+)       Capture group 1: syscall name at line start
    // \(.*?\)      Non-greedy match of arguments between parentheses
    // \s+=\s+.*?   Match return value portion
    // <(\d+\.?\d*)> Capture group 2: time value between <>
    regex_t regex;
    regcomp(&regex, "^(\\w+)\\(.*?\\)\\s+=\\s+.*?<(\\d+\\.?\\d*)>$", REG_EXTENDED);
    regmatch_t matches[3];  // Array for regex capture groups

    if (regexec(&regex, line, 3, matches, 0) != 0) {
        regfree(&regex);
        return 0;
    }

    // Extract syscall name from first capture group
    int name_len = matches[1].rm_eo - matches[1].rm_so;
    strncpy(syscall_name, line + matches[1].rm_so, name_len);
    syscall_name[name_len] = '\0';

    // Convert time string to double
    char* time_str = line + matches[2].rm_so;
    *time = atof(time_str);

    regfree(&regex);
    return 1;
}

/* Update statistics with new syscall data
 * stats: pointer to statistics container
 * name: syscall name to add/update
 * time: duration to accumulate
 * 
 * If the syscall already exists, we sum the time.
 * If new, we add it to the list (until MAX_SYSCALLS).
 */
void add_syscall(syscall_stats *stats, const char *name, double time) {
    // Safety check against array overflow
    if (stats->count >= MAX_SYSCALLS) {
        fprintf(stderr, "Warning: Reached maximum tracked syscalls (%d)\n", MAX_SYSCALLS);
        return;
    }

    // Check existing entries for duplicate syscall
    for (int i = 0; i < stats->count; i++) {
        if (strcmp(stats->stats[i].name, name) == 0) {
            stats->stats[i].time += time;
            stats->total_time += time;
            return;
        }
    }

    // Add new entry if space available
    if (stats->count < MAX_SYSCALLS) {
        strncpy(stats->stats[stats->count].name, name, sizeof(stats->stats[0].name));
        stats->stats[stats->count].time = time;
        stats->total_time += time;
        stats->count++;
    }
}

/* Display top N syscalls by accumulated time
 * stats: statistics container with all data
 * n: number of top results to show
 * 
 * Uses bubble sort for simplicity with small datasets.
 * Prints percentages relative to total tracked time.
 * Flushes output with nulls for clean pipe termination.
 */
void print_top_syscalls(syscall_stats *stats, int n) {
    if (stats->count == 0 || stats->total_time == 0) {
        fprintf(stderr, "Error: No valid syscall data to display\n");
        return;
    }

    // Simple O(n²) sort acceptable for MAX_SYSCALLS=1024
    for (int i = 0; i < stats->count - 1; i++) {
        for (int j = i+1; j < stats->count; j++) {
            // Swap if later entry has higher time
            if (stats->stats[j].time > stats->stats[i].time) {
                syscall_stat temp = stats->stats[i];
                stats->stats[i] = stats->stats[j];
                stats->stats[j] = temp;
            }
        }
    }

    // Determine number of results to display
    n = stats->count < n ? stats->count : n;
    for (int i = 0; i < n; i++) {
        double ratio = (stats->stats[i].time / stats->total_time) * 100;
        printf("%s (%d%%)\n", stats->stats[i].name, (int)ratio);
    }

    /* Null-terminate output for clean pipe communication
     * Some programs may read fixed buffer sizes - this ensures
     * we flush the pipe completely and avoid hanging readers */
    for (int i = 0; i < 80; i++) putchar('\0');
    fflush(stdout);
}

/* Main execution flow:
 * 1. Validate command line arguments
 * 2. Set up parent/child process communication
 * 3. Child: execute strace with command
 * 4. Parent: process and analyze strace output
 */
int main(int argc, char *argv[]) {
    if (argc < 2) {
        fprintf(stderr, "Usage: %s COMMAND [ARGS...]\n", argv[0]);
        fprintf(stderr, "Example: %s ls -l\n", argv[0]);
        return 1;
    }

    // Create pipe for inter-process communication
    int pipefd[2];
    if (pipe(pipefd) == -1) {
        perror("Failed to create pipe");
        return 1;
    }

    pid_t pid = fork();
    if (pid == -1) {
        perror("Process fork failed");
        return 1;
    }

    if (pid == 0) { // Child process - execute strace
        // Redirect stdout to pipe write end
        close(pipefd[0]);
        dup2(pipefd[1], STDOUT_FILENO);
        close(pipefd[1]);

        // Build strace argument array:
        // strace -T [user command] [args...]
        char* strace_argv[argc + 2];
        strace_argv[0] = "/usr/bin/strace";
        strace_argv[1] = "-T";  // Enable timing reports
        for (int i = 1; i < argc; i++) {
            strace_argv[i+1] = argv[i];
        }
        strace_argv[argc+1] = NULL;  // NULL-terminate array

        // Execute strace with clean environment
        execve("/usr/bin/strace", strace_argv, (char *[]){ "PATH=/bin:/usr/bin", NULL });
        perror("Failed to execute strace");
        return 1;
    } else { // Parent process - analyze output
        close(pipefd[1]);  // Close unused write end
        syscall_stats stats = {0};  // Initialize all fields to zero

        char buffer[1024];
        char* line_start = buffer;
        ssize_t bytes_read;

        // Read output in chunks until EOF
        while ((bytes_read = read(pipefd[0], 
               line_start, 
               sizeof(buffer) - (line_start - buffer))) > 0) {
            // Null-terminate the received data
            buffer[bytes_read + (line_start - buffer)] = '\0';
            line_start = buffer;

            // Process complete lines (delimited by newlines)
            char* line_end;
            while ((line_end = strchr(line_start, '\n')) != NULL) {
                *line_end = '\0';  // Temporarily terminate at newline

                char syscall_name[64];
                double time;
                if (parse_strace_line(line_start, syscall_name, &time)) {
                    add_syscall(&stats, syscall_name, time);
                }

                line_start = line_end + 1;  // Move to next line
            }

            // Handle partial line remaining in buffer
            if (line_start != buffer) {
                size_t remaining = buffer + sizeof(buffer) - line_start;
                // Move partial line to buffer start for next read
                memmove(buffer, line_start, remaining);
                line_start = buffer + remaining;
            }
        }

        // Cleanup and display results
        close(pipefd[0]);
        wait(NULL);  // Wait for strace to terminate completely
        print_top_syscalls(&stats, TOP_N);
    }

    return 0;
}

This is the command line to test:

root@DimLights:/mnt/f/WSL/Data/OperatingSystems/2025/os2025/sperf# gcc -g -o sperf sperf.c -lm -lrt
root@DimLights:/mnt/f/WSL/Data/OperatingSystems/2025/os2025/sperf# ./sperf echo "Hello WSL"

execve("/bin/echo", ["echo", "Hello WSL"], 0x7ffe6258ee20 /* 1 var */) = 0 <0.000363>
brk(NULL)                               = 0x55d4dee1c000 <0.000104>
mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f483b684000 <0.000137>
access("/etc/ld.so.preload", R_OK)      = -1 ENOENT (No such file or directory) <0.000115>
openat(AT_FDCWD, "/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3 <0.000133>
fstat(3, {st_mode=S_IFREG|0644, st_size=36191, ...}) = 0 <0.000115>
mmap(NULL, 36191, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f483b67b000 <0.000097>
close(3)                                = 0 <0.000079>
openat(AT_FDCWD, "/lib/x86_64-linux-gnu/libc.so.6", O_RDONLY|O_CLOEXEC) = 3 <0.000108>
read(3, "\177ELF\2\1\1\3\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\220\243\2\0\0\0\0\0"..., 832) = 832 <0.000094>
pread64(3, "\6\0\0\0\4\0\0\0@\0\0\0\0\0\0\0@\0\0\0\0\0\0\0@\0\0\0\0\0\0\0"..., 784, 64) = 784 <0.000101>
fstat(3, {st_mode=S_IFREG|0755, st_size=2125328, ...}) = 0 <0.000096>
pread64(3, "\6\0\0\0\4\0\0\0@\0\0\0\0\0\0\0@\0\0\0\0\0\0\0@\0\0\0\0\0\0\0"..., 784, 64) = 784 <0.000088>
mmap(NULL, 2170256, PROT_READ, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f483b469000 <0.000107>
mmap(0x7f483b491000, 1605632, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x28000) = 0x7f483b491000 <0.000117>
mmap(0x7f483b619000, 323584, PROT_READ, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1b0000) = 0x7f483b619000 <0.000099>
mmap(0x7f483b668000, 24576, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1fe000) = 0x7f483b668000 <0.000102>
mmap(0x7f483b66e000, 52624, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x7f483b66e000 <0.000103>
close(3)                                = 0 <0.000104>
mmap(NULL, 12288, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f483b466000 <0.000085>
arch_prctl(ARCH_SET_FS, 0x7f483b466740) = 0 <0.000049>
set_tid_address(0x7f483b466a10)         = 1543 <0.000319>
set_robust_list(0x7f483b466a20, 24)     = 0 <0.000098>
rseq(0x7f483b467060, 0x20, 0, 0x53053053) = 0 <0.000123>
mprotect(0x7f483b668000, 16384, PROT_READ) = 0 <0.000096>
mprotect(0x55d4de99a000, 4096, PROT_READ) = 0 <0.000093>
mprotect(0x7f483b6bc000, 8192, PROT_READ) = 0 <0.000103>
prlimit64(0, RLIMIT_STACK, NULL, {rlim_cur=8192*1024, rlim_max=RLIM64_INFINITY}) = 0 <0.000095>
munmap(0x7f483b67b000, 36191)           = 0 <0.000118>
getrandom("\xc0\x8d\xa3\xbc\x88\x59\xbb\xd3", 8, GRND_NONBLOCK) = 8 <0.000098>
brk(NULL)                               = 0x55d4dee1c000 <0.000113>
brk(0x55d4dee3d000)                     = 0x55d4dee3d000 <0.000097>
fstat(1, {st_mode=S_IFIFO|0600, st_size=0, ...}) = 0 <0.000092>
write(1, "Hello WSL\n", 10Received from pipe: Hello WSL
)             = 10 <0.000216>
close(1)                                = 0 <0.000165>
close(2)                                = 0 <0.000092>
exit_group(0)                           = ?
+++ exited with 0 +++
Total count is zero

Here's what I tried and what I expected:

I used GDB debugging and noticed that the

while ((line = strchr(line_start, '\n')) != NULL)

loop only ran once when testing the command. I can't figure out why the output shows:

+++ exited with 0 +++
Total count is zero

I've also asked AI for help and modified parts of the parse_strace_line function and the regular expression pattern, but with little success.

What actually happened: The statistics remain empty (Total count is zero), indicating the parsing or data extraction failed.

What I expected: The code should correctly parse system call timestamps from strace output, accumulate their times, and calculate the ratio of each call's time to the total.

As a student, I'd greatly appreciate it if you can point out the errors in my code and explain how to correctly read system call times and convert them into the ratio.

Solution

Properly capturing the trace output

Here's what I tried and what I expected: I used GDB debugging and noticed that the while ((line = strchr(line_start, '\n')) != NULL) loop only ran once when testing the command. I can't figure out why the output shows +++ exited with 0 +++ and Total count is zero.

Presumably, you also noticed that the strace output is being printed in your terminal. Which means it is going there instead of into the pipe. This is because strace sends its output to stderr, but you are redirecting (only) stdout.

stderr is the conventional place for diagnostic output, which is a good characterization of strace's own whole output. Putting such output there allows you to discriminate between the regular output of the program being traced and strace's diagnostics. If the program being traced writes anything to its standard error, however, then you need other measures to separate that from the strace output. In any case, your parent process never reads any of that. Whether it reads anything at all will depend on the behavior of the process being traced.

Plausible fix:

        dup2(pipefd[1], STDOUT_FILENO);

-->

        dup2(pipefd[1], STDERR_FILENO);

Regex details

Once you get past that, you will find that the regex with which you are trying to parse the strace output does not behave as you intend. It is important to understand here that there is not just one regex language, but rather a whole family of them. Some are more compatible with each other than others. Some have more features than others. You are using the POSIX regcomp() and regexec() functions, which implement the POSIX basic- and extended regular expression dialects, and in particular, you are using the ERE language.

The POSIX regex languages are among the oldest still in common use. They lack a variety features that you can find in many other regex dialects. In particular,

although they use backslash (\) as an escape character, this is strictly for suppressing the meaning of special characters. BRE and ERE do not recognize escape codes representing character classes, such as \s or \d.
BRE and ERE do not have non-greedy repetition operators, such as *?

But these features are easily enough replaced in your particular regex. Starting with ...

"^(\\w+)\\(.*?\\)\\s+=\\s+.*?<(\\d+\\.?\\d*)>$"

... and using the ERE flavor, we can first observe that a character class of the form [[:space:]] matches all characters for which isspace() would return a truthy result. This is very similar to, if not identical to, what you presumably expect of \s. Making that substitution give us:

"^(\\w+)\\(.*?\\)[[:space:]]+=[[:space:]]+.*?<(\\d+\\.?\\d*)>$"

Similarly, [[:digit:]] is a suitable ERE spelling of \d, or you could use [0-9], too. Now we have:

"^(\\w+)\\(.*?\\)[[:space:]]+=[[:space:]]+.*?<([[:digit:]]\\.?[[:digit:]]*)>$"

The \w does not have quite so simple an analog, but this will do: [[:alnum:]_]. That matches a single alphanumeric character or an underscore (_), which is probably what you mean by \w, modulo locale-related considerations. Now we have:

"^([[:alnum:]_]+)\\(.*?\\)[[:space:]]+=[[:space:]]+.*?<([[:digit:]]\\.?[[:digit:]]*)>$"

Finally, you have two uses of .*?. These are not easy to replace in general, but for your particular use, sub-expressions involving negated character classes can provide adequate substitutes:

"^([[:alnum:]_]+)\\([^)]*\\)[[:space:]]+=[[:space:]]+[^<]*<([[:digit:]]\\.?[[:digit:]]*)>$"

That's now a C string literal representing a valid POSIX extended regular expression with the meaning you seem to have intended. However, as a matter of personal preference, I tend to prefer using character classes rather than backslashes to suppress the special meaning of most regex characters, especially in contexts such as string literals where the backslashes need to be escaped themselves. Applying that principle gets us to this, which I find slightly more readable:

"^([[:alnum:]_]+)[(][^)]*[)][[:space:]]+=[[:space:]]+[^<]*<([[:digit:]][.]?[[:digit:]]*)>$"

Enough?

Somewhat to my surprise, given the complexity of the program, fixing the redirection and the regex seems to be enough to make the program read and successfully process the trace output:

$ ./regtrace ls
[...]
mmap (37%)
mprotect (16%)
close (10%)
openat (10%)
read (9%)
$

The program might have other issues, but this is where I stop.