cavro

how to use array as value of map in avro-c


How can I have a map with array values?

I use avro 1.7.7 and my schema is like this:

{
    "type": "map",
    "values": {
        "type": "array",
        "items": "int"
    }
}

My program is like this:

main.c

#include <stdio.h>
#include <stdarg.h>
#include <avro.h>

#define iout(i); sout("%d",i);
#define cout(c); sout("%c",c);
#define lout(l); sout("%ld",l);
#define piout(str,i); sout("%s:%d",str,i);
#define psout(str,s); sout("%s:%s",str,s);
#define pcout(str,c); sout("%s:%c",str,c);
#define plout(str,l); sout("%s,%l",str,l);

//for output

void sout(const char *format, ...) {
    va_list args;
    va_start(args, format);
    vfprintf(stdout, format, args); 
    fprintf(stdout, "\n");
    va_end(args);
}

int main() {
    int res, i, j, k;

    avro_schema_t schema;
    avro_datum_t rec;

    //schema
    avro_schema_t int_array_schema = avro_schema_array(avro_schema_int());
    avro_schema_t int_map_schema = avro_schema_map(int_array_schema);
    avro_schema_t int_union_schema = avro_schema_union();
    avro_schema_union_append(int_union_schema, int_map_schema);
    avro_schema_union_append(int_union_schema, avro_schema_null());

    //choose schema
    //    schema = int_array_schema;
    schema = int_map_schema;
    //    schema = int_union_schema;

    //print schema
    char schemaPrintBuf [1024];
    avro_writer_t jswriter = avro_writer_memory(schemaPrintBuf, 1024);
    avro_schema_to_json(schema, jswriter);
    psout("schema", schemaPrintBuf);


    //data
    int intry[] = {9, 8, 7, 6, 5, 4, 3};
    avro_datum_t int_array = avro_array(int_array_schema);
    for (i = 0; i < 7; ++i) {
        avro_datum_t vt = avro_int32(intry[i]);
        res = avro_array_append_datum(int_array, vt);
        avro_datum_decref(vt);
    }
    avro_datum_t int_map = avro_map(int_map_schema);
    res = avro_map_set(int_map, "intarray", int_array);
    avro_datum_decref(int_array);
    avro_datum_t int_a_union_datum = avro_union(int_union_schema, 0, int_map);

    //choose data
    //    rec = int_array;
    rec = int_map;
    //    rec = int_a_union_datum;

    //print data detail
    sout("");
    char * json;
    sout("rec:");
    avro_datum_to_json(rec, 0, &json);
    sout(json);

    //serialize
    char buf[1024];
    avro_writer_t writer = avro_writer_memory(buf, 1024);
    res = avro_write_data(writer, schema, rec);
    if (res) {
        psout("write result", avro_strerror());
    }
    long len = avro_size_data(writer, schema, rec);
    piout("data len", len);

    //read
    avro_reader_t reader = avro_reader_memory(buf, 1024);
    avro_datum_t rslt;
    res = avro_read_data(reader, schema, schema, &rslt);
    if (res) {
        psout("read error ", avro_strerror());
    }

    //read data
    sout("");
    sout("rslt:");
    avro_datum_to_json(rslt, 0, &json);
    sout(json);
    return 0;
}

This is my target schema: avro_schema_t schema;
And this is my data: avro_datum_t rec;

In //choose schema and //choose data part, if schema = int_array_schema and rec = int_array,the output is like this:

enter image description here

If schema = int_map_schema and rec = int_map, the output is like this:

enter image description here


Segmentation fault appears here: res = avro_write_data(writer, schema, rec);.

So how can i have a map with arrays as it's values?


Solution

  • You don't seem to be doing anything wrong in relation to the code; however it looks like since the project moved to the 'generic value' format and implemented the legacy wrappers for the new API it appears like this is a bug in backwards compatibility.

    If you regenerate the schema from the json you built in your code, it works, which seems to indicate a functional regression:

    so here, if we generate a new copy of the schema:

    psout("schema", schemaPrintBuf);
    avro_schema_t from_json;
    if (!avro_schema_from_json_literal(schemaPrintBuf, &from_json)) {
        fprintf(stderr, "Cannot convert from json literal: %s", avro_strerror());
        exit(1);
    }
    
    // then in the write:
    res = avro_write_data(writer, from_json, rec);
    

    it no longer crashes - this is with only replacing that single schema reference - if you leave the other references to schema in the re-read it looks like it works.

    Alternatively, because you constructed the avro_datum_t based on a known, manually constructed schema, you could disable the writer verification, which seems to be the origin of the crash:

     res = avro_write_data(writer, NULL, rec);
    

    does not crash in this case - it may be some complication of the schema verification in this case.