I have a string with the following content (UTF-8):
__$FOO ${FOO} ${FOO:def} ${FOO2:-тест}
And environment variable FOO
with a value test
. My C application should work like a GNU envsubs
- replace all $FOO
or ${FOO}
entries with a test
- nothing complicated. Expected result:
__test test test тест
But... How can I do this using C only? I can't use something like exec
or external (dynamic) libraries (my app is statically linked for using in docker scratch).
I know about envsubst from gettext, but it does not support the default values, as a minimum.
I found libraries with all required features in Go - stephenc/envsub and Rust - stephenc/envsub, but maybe anyone knows how I can do the same in C? I don't want to invent something that has probably already been invented.
static char *envsubst(char *str) {
// magic
}
Since I could not find an answer, I decided to write my own parser. It has less functionality than I was looking for, but that was enough for my case:
#include <stdlib.h>
#include <string.h>
#include <stdbool.h>
#include "envsubst.h"
struct buffer {
char *data;
unsigned int len, cap;
};
static struct buffer *newBuf(unsigned int cap) {
struct buffer *b = malloc(sizeof(struct buffer));
b->data = malloc(cap * sizeof(char));
b->cap = cap;
memset(b->data, '\0', b->cap);
return b;
}
static void emptyBuf(struct buffer *buf) {
if (buf->len > 0) {
memset(buf->data, '\0', buf->cap);
buf->len = 0;
}
}
static void writeInBuf(struct buffer *buf, const char c) {
if (buf->cap <= buf->len + 1) {
size_t newSize = buf->cap + 64; // growing size
void *newAlloc = realloc(buf->data, newSize);
if (newSize > buf->cap && newAlloc) {
size_t diff = newSize - buf->cap;
void *pStart = ((char *) newAlloc) + buf->cap;
memset(pStart, '\0', diff);
buf->data = newAlloc;
buf->cap = newSize;
}
}
buf->data[buf->len++] = c;
}
static void writeStringInBuf(struct buffer *buf, const char *str) {
for (unsigned int j = 0; str[j] != '\0'; j++) {
writeInBuf(buf, str[j]);
}
}
/**
* Parse the string and replace patterns in format `${ENV_NAME:-default_value}` with
* the values from the environment (or default values after `:-` if provided).
*/
char *envsubst(const char *str) {
size_t strLen = strlen(str);
if (strLen < 4) {
return (char*) str;
}
struct buffer *result = newBuf(strLen);
struct buffer *envName = newBuf(32);
struct buffer *envDef = newBuf(32);
enum {
DATA,
ENV_NAME,
ENV_DEFAULT,
} state = DATA, prevState = DATA;
bool flush = false;
unsigned int nested = 0;
for (unsigned int i = 0; str[i] != '\0'; i++) {
// detect the state
if (str[i] == '$' && str[i + 1] == '{') {
i++;
nested++;
prevState = state;
state = ENV_NAME;
continue;
} else if ((str[i] == ':' && str[i + 1] == '-') && state == ENV_NAME) {
i++;
prevState = state;
state = ENV_DEFAULT;
continue;
} else if (str[i] == '}' && (state == ENV_NAME || state == ENV_DEFAULT)) {
nested--;
if (nested == 0) {
i++;
prevState = state;
state = DATA;
flush = true;
}
}
const char c = str[i];
// state processing
switch (state) {
case ENV_NAME:
writeInBuf(envName, c);
break;
case ENV_DEFAULT:
writeInBuf(envDef, c);
break;
case DATA:
if (prevState == ENV_NAME || prevState == ENV_DEFAULT) {
char *envVar = getenv(envName->data);
if (envVar) {
writeStringInBuf(result, envVar);
} else if (envDef->len > 0) {
writeStringInBuf(result, envDef->data);
}
emptyBuf(envName);
emptyBuf(envDef);
}
if (flush) {
i--;
flush = false;
continue;
}
writeInBuf(result, c);
}
}
free(envName->data);
free(envName);
free(envDef->data);
free(envDef);
char *data = result->data;
free(result);
return data;
}
And the tests:
#include <assert.h>
// tests running: `gcc -o ./tmp/subs ./src/envsubst.c && ./tmp/subs`
int main() {
putenv("Test_1=foo");
putenv("__#Test_2=😎");
assert(strcmp(
envsubst("__$_UNSET_VAR_ ${_UNSET_VAR_} ${_UNSET_VAR_:-default value 😎}"),
"__$_UNSET_VAR_ default value 😎"
) == 0);
assert(strcmp(
envsubst("${__#Test_2} ${__#Test_2:-foo}${_UNSET_VAR_:-def}${__#Test_2}"), "😎 😎def😎"
) == 0);
assert(strcmp(
envsubst("${Test_1} ${Test_1:-def}${Test_1}"), "foo foofoo"
) == 0);
assert(strcmp(
envsubst("__$FOO ${bar} $FOO:def ${Test_1:-def} ${Test_1} ${_UNSET_VAR_:-default} bla-bla ${FOO2:-тест}${ABC} ${}${}"),
"__$FOO $FOO:def foo foo default bla-bla тест "
) == 0);
assert(strcmp(
envsubst("${_UNSET_VAR_:-${Test_1}}"), ""
) == 0);
assert(strcmp(
envsubst("aaa ${}} ${${} bbb"), "aaa } "
) == 0);
}