cenvironment-variablesvariable-substitutionenvsubst

Environment variables substitution in C


I have a string with the following content (UTF-8):

__$FOO ${FOO} ${FOO:def} ${FOO2:-тест}

And environment variable FOO with a value test. My C application should work like a GNU envsubs - replace all $FOO or ${FOO} entries with a test - nothing complicated. Expected result:

__test test test тест

But... How can I do this using C only? I can't use something like exec or external (dynamic) libraries (my app is statically linked for using in docker scratch).

I know about envsubst from gettext, but it does not support the default values, as a minimum.

I found libraries with all required features in Go - stephenc/envsub and Rust - stephenc/envsub, but maybe anyone knows how I can do the same in C? I don't want to invent something that has probably already been invented.

static char *envsubst(char *str) {
  // magic
}

Solution

  • Since I could not find an answer, I decided to write my own parser. It has less functionality than I was looking for, but that was enough for my case:

    #include <stdlib.h>
    #include <string.h>
    #include <stdbool.h>
    #include "envsubst.h"
    
    struct buffer {
            char *data;
            unsigned int len, cap;
    };
    
    static struct buffer *newBuf(unsigned int cap) {
        struct buffer *b = malloc(sizeof(struct buffer));
    
        b->data = malloc(cap * sizeof(char));
        b->cap = cap;
        memset(b->data, '\0', b->cap);
    
        return b;
    }
    
    static void emptyBuf(struct buffer *buf) {
        if (buf->len > 0) {
            memset(buf->data, '\0', buf->cap);
            buf->len = 0;
        }
    }
    
    static void writeInBuf(struct buffer *buf, const char c) {
        if (buf->cap <= buf->len + 1) {
            size_t newSize = buf->cap + 64; // growing size
    
            void *newAlloc = realloc(buf->data, newSize);
    
            if (newSize > buf->cap && newAlloc) {
                size_t diff = newSize - buf->cap;
                void *pStart = ((char *) newAlloc) + buf->cap;
                memset(pStart, '\0', diff);
    
                buf->data = newAlloc;
                buf->cap = newSize;
            }
        }
    
        buf->data[buf->len++] = c;
    }
    
    static void writeStringInBuf(struct buffer *buf, const char *str) {
        for (unsigned int j = 0; str[j] != '\0'; j++) {
            writeInBuf(buf, str[j]);
        }
    }
    
    /**
     * Parse the string and replace patterns in format `${ENV_NAME:-default_value}` with
     * the values from the environment (or default values after `:-` if provided).
     */
    char *envsubst(const char *str) {
        size_t strLen = strlen(str);
    
        if (strLen < 4) {
            return (char*) str;
        }
    
        struct buffer *result = newBuf(strLen);
        struct buffer *envName = newBuf(32);
        struct buffer *envDef = newBuf(32);
    
        enum {
                DATA,
                ENV_NAME,
                ENV_DEFAULT,
        } state = DATA, prevState = DATA;
        bool flush = false;
        unsigned int nested = 0;
    
        for (unsigned int i = 0; str[i] != '\0'; i++) {
            // detect the state
            if (str[i] == '$' && str[i + 1] == '{') {
                i++;
                nested++;
                prevState = state;
                state = ENV_NAME;
    
                continue;
            } else if ((str[i] == ':' && str[i + 1] == '-') && state == ENV_NAME) {
                i++;
                prevState = state;
                state = ENV_DEFAULT;
    
                continue;
            } else if (str[i] == '}' && (state == ENV_NAME || state == ENV_DEFAULT)) {
                nested--;
    
                if (nested == 0) {
                    i++;
                    prevState = state;
                    state = DATA;
                    flush = true;
                }
            }
    
            const char c = str[i];
    
            // state processing
            switch (state) {
                case ENV_NAME:
                    writeInBuf(envName, c);
                    break;
    
                case ENV_DEFAULT:
                    writeInBuf(envDef, c);
                    break;
    
                case DATA:
                    if (prevState == ENV_NAME || prevState == ENV_DEFAULT) {
                        char *envVar = getenv(envName->data);
    
                        if (envVar) {
                            writeStringInBuf(result, envVar);
                        } else if (envDef->len > 0) {
                            writeStringInBuf(result, envDef->data);
                        }
    
                        emptyBuf(envName);
                        emptyBuf(envDef);
                    }
    
                    if (flush) {
                        i--;
                        flush = false;
    
                        continue;
                    }
    
                    writeInBuf(result, c);
            }
        }
    
        free(envName->data);
        free(envName);
    
        free(envDef->data);
        free(envDef);
    
        char *data = result->data;
        free(result);
    
        return data;
    }
    

    And the tests:

    #include <assert.h>
    
    // tests running: `gcc -o ./tmp/subs ./src/envsubst.c && ./tmp/subs`
    int main() {
        putenv("Test_1=foo");
        putenv("__#Test_2=😎");
    
        assert(strcmp(
            envsubst("__$_UNSET_VAR_ ${_UNSET_VAR_} ${_UNSET_VAR_:-default value 😎}"),
            "__$_UNSET_VAR_  default value 😎"
        ) == 0);
    
        assert(strcmp(
            envsubst("${__#Test_2} ${__#Test_2:-foo}${_UNSET_VAR_:-def}${__#Test_2}"), "😎 😎def😎"
        ) == 0);
    
        assert(strcmp(
            envsubst("${Test_1} ${Test_1:-def}${Test_1}"), "foo foofoo"
        ) == 0);
    
        assert(strcmp(
            envsubst("__$FOO ${bar} $FOO:def ${Test_1:-def} ${Test_1} ${_UNSET_VAR_:-default} bla-bla ${FOO2:-тест}${ABC} ${}${}"),
            "__$FOO  $FOO:def foo foo default bla-bla тест "
        ) == 0);
    
        assert(strcmp(
            envsubst("${_UNSET_VAR_:-${Test_1}}"), ""
        ) == 0);
    
        assert(strcmp(
            envsubst("aaa ${}} ${${} bbb"), "aaa } "
        ) == 0);
    }