ciconvlibiconv

iconv: Invalid argument error when passing struct element


I have a function which converts utf-16 string to utf-8 by using iconv library (ignore "ISO-8859-1" part, my test files all are utf-8 and utf-16). Here is it's code:

char* to_utf8(const unsigned int *encoding, char *source_str, unsigned int source_size){
    iconv_t cd;
    // Setting "from" encoding
    if (*encoding == 0){cd = iconv_open("UTF-8", "ISO-8859-1");
    } else {cd = iconv_open("UTF-8", "UTF-16LE"); };
    if (cd == (iconv_t)-1) {endwin(); perror("iconv_open:"); exit(1); };
    
    // Skip BOM if present
    int offset = 0;
    if (source_size >= 2 &&
        ((unsigned char)source_str[0] == 0xFE && (unsigned char)source_str[1] == 0xFF) ||
        ((unsigned char)source_str[0] == 0xFF && (unsigned char)source_str[1] == 0xFE)) {
        offset=2;
        source_size -= 2;
    };
    
    size_t in_str_size = source_size,
           out_str_size = source_size;

    char *inbuf = (char *)source_str + offset;
    char *output = malloc(out_str_size);
    char *outbuf = output;

    size_t result = iconv(cd, &inbuf, &in_str_size, &outbuf, &out_str_size);
    if (result == (size_t)-1) {
        endwin();
        perror("iconv");
        free(output);
        iconv_close(cd);
        exit(1);
        return NULL;
    };
    
    // Do not leak memory
    free(source_str);
    iconv_close(cd);

    return output;
}

Here is the part of code where track data declared and the function to_utf8 is called:

//...
Track *track_data = calloc(sizeof(*track_data), 1);
//...
if (strcmp(tag_str, "TIT2") == 0){
                track_data->title = calloc(sizeof(char), tag_size);
                memcpy(track_data->title, &id3_metadata_str[offset], tag_size-1);
                // Fix encoding if needed
                if (encoding != 3){track_data->title = to_utf8(&encoding, track_data->title, tag_size); };

Here is the Track struct declaration:

struct Track{
    char   *path;
    char   *artist;
    char   *album;
    char   *title;
    char   *year;
    char   *track;
    double  duration;
    char   *dur_str;
    int     lyrics_size;
    char   *lyrics;
    int     shfl_num;
    long    progress;
} typedef Track;

This code above results in error "iconv: Invalid argument".

If I run same code with manually created string, everything works fine. Here is the code:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <iconv.h>

char* to_utf8(const unsigned int *encoding, char *source_str, unsigned int source_size){
    iconv_t cd;
    // Setting "from" encoding
    if (*encoding == 0){cd = iconv_open("UTF-8", "ISO-8859-1");
    } else {cd = iconv_open("UTF-8", "UTF-16LE"); };
    if (cd == (iconv_t)-1) {perror("iconv_open:"); exit(1); };
    
    // Skip BOM if present
    int offset = 0;
    if (source_size >= 2 &&
        ((unsigned char)source_str[0] == 0xFE && (unsigned char)source_str[1] == 0xFF) ||
        ((unsigned char)source_str[0] == 0xFF && (unsigned char)source_str[1] == 0xFE)) {
        offset=2;
        source_size -= 2;
    };
    
    size_t in_str_size = source_size,
           out_str_size = source_size;

    char *inbuf = (char *)source_str + offset;
    char *output = malloc(out_str_size);
    char *outbuf = output;

    size_t result = iconv(cd, &inbuf, &in_str_size, &outbuf, &out_str_size);
    if (result == (size_t)-1) {
        perror("iconv");
        free(output);
        iconv_close(cd);
        exit(1);
        return NULL;
    };
    
    // Do not leak memory
    iconv_close(cd);

    return output;
}

int main() {
    unsigned int encoding = 1; // Assume UTF-16 with BOM
    char content_str[] = {0xFF, 0xFE, 0x42, 0x00, 0x61, 0x00, 0x64, 0x00, 0x20, 0x00, 0x44, 0x00, 0x61, 0x00, 0x79,
                          0x00, 0x20, 0x00, 0x66, 0x00, 0x6F, 0x00, 0x72, 0x00, 0x20, 0x00, 0x4D, 0x00, 0x79, 0x00,
                          0x20, 0x00, 0x45, 0x00, 0x6E, 0x00, 0x65, 0x00, 0x6D, 0x00, 0x69, 0x00, 0x65, 0x00, 0x73, 0x00 }; // Example UTF-16LE with BOM
    unsigned int tag_size = sizeof(content_str);

    size_t output_size;
    char* utf8_str = to_utf8(&encoding, content_str, tag_size);
    if (utf8_str != NULL) {
        printf("Converted UTF-8 string: %s\n", utf8_str);
        free(utf8_str);
    } else {
        printf("Conversion failed.\n");
    }

    return 0;
}

The data in both cases is the same. Why do I get invalid argument error in case of passing track_data->title, but don't get it in case char content_str[]?


Solution

  • Thanks to @IanAbbott I found where the problem is (+ learned about errno).

    The problem was with source_size value to which was in_str_size set. Inside the string was content of the id3v2 tag, which contains encoding byte, 2 bytes with BOM and, in this case, title of the track. source_size is size of this all in bytes. I deducted from source size 2 bytes for BOM but didn't deduct 1 byte for encoding bit. So after I deducted 1 from source_size everything started to work.

    So, "Invalid argument" means that "An incomplete multibyte sequence is encountered in the input". Also, I'll add that "Argument list too long" means that "Output buffer has no more room for the next converted character".

    I love C. Pay attention, or you'll waste half of a day on stupid error. At least I've learned something new.