cgcccl

I found a base64 decoder written in C. It works correctly when I compile it with GCC. But it went wrong with cl.exe. Why?


/**
 * Copyright (c) 2006-2018 Apple Inc. All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 **/

#include "base64.h"

#include <stdlib.h>
#include <string.h>
#include <stdio.h>

 // base64 tables
static const char basis_64[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
static const signed char index_64[128] = {
    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62,
    -1, -1, -1, 63, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1, -1, 0,
    1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
    23, 24, 25, -1, -1, -1, -1, -1, -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38,
    39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1 };
#define CHAR64(c) (((c) < 0 || (c) > 127) ? -1 : index_64[(c)])

// base64_encode    :    base64 encode
//
// value            :    data to encode
// vlen             :    length of data
// (result)         :    new char[] - c-str of result
char* base64_encode(const unsigned char* value, size_t vlen) {
    char* result = (char*)malloc((vlen * 4) / 3 + 5);
    if (result == NULL) {
        return NULL;
    }
    char* out = result;
    while (vlen >= 3) {
        *out++ = basis_64[value[0] >> 2];
        *out++ = basis_64[((value[0] << 4) & 0x30) | (value[1] >> 4)];
        *out++ = basis_64[((value[1] << 2) & 0x3C) | (value[2] >> 6)];
        *out++ = basis_64[value[2] & 0x3F];
        value += 3;
        vlen -= 3;
    }
    if (vlen > 0) {
        *out++ = basis_64[value[0] >> 2];
        unsigned char oval = (value[0] << 4) & 0x30;
        if (vlen > 1)
            oval |= value[1] >> 4;
        *out++ = basis_64[oval];
        *out++ = (vlen < 2) ? '=' : basis_64[(value[1] << 2) & 0x3C];
        *out++ = '=';
    }
    *out = '\0';

    return result;
}

// base64_decode    :    base64 decode
//
// value            :    c-str to decode
// rlen             :    length of decoded result
// (result)         :    new unsigned char[] - decoded result
unsigned char* base64_decode(const char* value, size_t* rlen) {
    *rlen = 0;
    int c1, c2, c3, c4;

    size_t vlen = strlen(value);
    unsigned char* result = (unsigned char*)malloc((vlen * 3) / 4 + 1);
    if (result == NULL) {
        return NULL;
    }
    unsigned char* out = result;

    while (1) {
        if (value[0] == 0) {
            //*out = '\0';
            return result;
        }
        c1 = value[0];
        if (CHAR64(c1) == -1) {
            goto base64_decode_error;
            ;
        }
        c2 = value[1];
        if (CHAR64(c2) == -1) {
            goto base64_decode_error;
            ;
        }
        c3 = value[2];
        if ((c3 != '=') && (CHAR64(c3) == -1)) {
            goto base64_decode_error;
            ;
        }
        c4 = value[3];
        if ((c4 != '=') && (CHAR64(c4) == -1)) {
            goto base64_decode_error;
            ;
        }

        value += 4;
        *out++ = (CHAR64(c1) << 2) | (CHAR64(c2) >> 4);
        *rlen += 1;

        if (c3 != '=') {
            *out++ = ((CHAR64(c2) << 4) & 0xf0) | (CHAR64(c3) >> 2);
            *rlen += 1;

            if (c4 != '=') {
                *out++ = ((CHAR64(c3) << 6) & 0xc0) | CHAR64(c4);
                *rlen += 1;
            }
        }
    }

base64_decode_error:
    *result = 0;
    *rlen = 0;

    return result;
}
// Below is my test.
int main() {
    char str[] = "123456";
    char* encoded = base64_encode(str, strlen(str));
    printf("encode : %s\n", encoded);
    int rlen = 3;
    char* decoded = base64_decode(encoded, &rlen);
    printf("decode : %s\n", decoded);
    printf("len: %d\n", strlen(decoded));
    return 0;
}

Result (cl.exe) : encode : MTIzNDU2 decode : 123456? len: 7

Then I thought these code may work in unix, and the compiler may be cc(gcc). So I compiled these code in my WSL with gcc, and it worked correctly.

Result (gcc) : encode : MTIzNDU2 decode : 123456 len: 6

Why gcc is correct but cl is not?

What's the difference between these two compiler in this case?


Solution

  • There's a reason that base64_decode returns a length to you. If (but only if) you know that the encoded text had been human-readable, you could print it back out like this:

    int rlen;
    char* decoded = base64_decode(encoded, &rlen);
    printf("decode : %.*s\n", rlen, decoded);
    printf("len: %d\n", rlen);
    

    %.*s tells printf that the length of the string you're printing is given, not by the usual \0 terminator in the string, but rather, by an explicit length you pass in.

    As mentioned in the comments, Base-64 encoding is usually used to encode arbitrary binary data, which might contain embedded \0 characters, and which typically isn't null-terminated. For arbitrary binary data, you usually want to carry the length around as a separate variable, like rlen here.

    Another issue is that this base64_decode function is returning the length as a size_t value, not int. So you really need:

    size_t rlen;
    char* decoded = base64_decode(encoded, &rlen);
    printf("decode : %.*s\n", (int)rlen, decoded);
    printf("len: %zd\n", rlen);