jsonzig

Parsing array of json objects in zig


I am attempting to parse a .json file in zig however am very new to zig and having no luck finding much documentation for this. Here is my working example of code:

const std = @import("std");

pub const SearchIndexEntry = struct {
    task_id: []const u8,
    client_thumbprint: []const u8,
    search_item_timestamp: []const u8,
    search_item: []const u8,
    search_results: []const u8,
    search_results_captured: []const u8,
    sum_bundle: []const u8,
    num_outlier: []const u8,
    percent_complete: []const u8,
    p0: []const u8,
    p25: []const u8,
    p50: []const u8,
    p75: []const u8,
    p100: []const u8,
    mean: []const u8,
    std: []const u8,
};

pub fn main() !void {
    const url = "https://storage.googleapis.com/owlrepo/v1/queries/search_item_listing.json";
    var arena = std.heap.ArenaAllocator.init(std.heap.page_allocator);
    defer arena.deinit();
    const allocator = arena.allocator();
    var client = std.http.Client{ .allocator = allocator };
    defer client.deinit();

    const uri = try std.Uri.parse(url);

    const headers = std.http.Client.Request.Headers{
        .content_type = std.http.Client.Request.Headers.Value{
            .override = "application/json",
        },
    };

    const server_header_buffer: []u8 = try allocator.alloc(u8, 8 * 1024 * 4);

    var req = try client.open(.GET, uri, std.http.Client.RequestOptions{
        .server_header_buffer = server_header_buffer,
        .headers = headers,
    });
    defer req.deinit();

    try req.send();
    try req.wait();

    const json_str = try req.reader().readAllAlloc(allocator, std.math.maxInt(usize));
    defer allocator.free(json_str);

    // This is where its failing:
    const data = try std.json.parseFromSlice(SearchIndexEntry, allocator, json_str, .{});
    defer data.deinit();

    defer std.debug.print("{any}", .{data});
}

This is the error message im getting, and my assumption is that its because the .json file is an array of objs?

    error: UnexpectedToken
/home/a/dl/zig-dl/lib/std/json/static.zig:334:53: 0x123d778 in innerParse__anon_13392 (main)
            if (.object_begin != try source.next()) return error.UnexpectedToken;
                                                    ^
/home/a/dl/zig-dl/lib/std/json/static.zig:149:19: 0x1152e34 in parseFromTokenSourceLeaky__anon_9602 (main)
    const value = try innerParse(T, allocator, scanner_or_reader, resolved_options);
                  ^
/home/a/dl/zig-dl/lib/std/json/static.zig:116:20: 0x112a5d5 in parseFromTokenSource__anon_5642 (main)
    parsed.value = try parseFromTokenSourceLeaky(T, parsed.arena.allocator(), scanner_or_reader, options);
                   ^
/home/a/dl/zig-dl/lib/std/json/static.zig:82:5: 0x111c5a1 in parseFromSlice__anon_3480 (main)
    return parseFromTokenSource(T, allocator, &scanner, options);
    ^
/home/a/c/owldepo/scrapper/main.zig:35:18: 0x1116cec in main (main)
    const data = try std.json.parseFromSlice(SearchIndexEntry, allocator, json_str, .{});

Solution

  • The response JSON contains an array of thousands of objects, but you're trying to parse the JSON as if it were a single object.

    Use []SearchIndexEntry in parseFromSlice:

    const data = try std.json.parseFromSlice([]SearchIndexEntry, allocator, json_str, .{});
    defer data.deinit();
    
    std.debug.print("objects: {}\n", .{ data.value.len });
    std.debug.print("first: {any}\n", .{ data.value[0] });
    

    But then you'll get the same UnexpectedToken error as before. This is because the field types in SearchIndexEntry don't match the JSON.

    You may have issues figuring out what types each of the fields are (like, the fact that search_results can be null). You can experiment with parsing of a single field by adding the .ignore_unknown_fields = true option to the parseFromSlice.

    The final struct looks like this:

    const SearchIndexEntry = struct {
        task_id: []const u8,
        client_thumbprint: ?[]const u8,
        search_item_timestamp: []const u8,
        search_item: []const u8,
        search_results: ?i32,
        search_results_captured: i32,
        sum_bundle: i32,
        num_outlier: i32,
        percent_complete: ?f32,
        p0: i64,
        p25: i64,
        p50: i64,
        p75: i64,
        p100: i64,
        mean: i64,
        std: i64,
    };
    

    Now it works:

    $ zig build run
    objects: 17257
    first: main.SearchIndexEntry{ .task_id = { 54, 50, 98, 57, 56, 55, 56, 49, 45, 52, 55, 98, 98, 45, 52, 98, 49, 101, 45, 56, 54, 56, 101, 45, 98,
    53, 101, 52, 102, 100, 57, 57, 100, 54, 98, 101 }, .client_thumbprint = { 95, 66, 57, 120, 56, 85, 117, 120, 113, 116, 117, 65, 74, 102, 71, 111,
     114, 85, 88, 109, 81, 65, 116, 70, 99, 71, 122, 71, 45, 57, 72, 117, 56, 99, 74, 76, 83, 120, 67, 120, 57, 117, 99 }, .search_item_timestamp = {
     50, 48, 50, 52, 45, 49, 50, 45, 50, 53, 84, 48, 50, 58, 48, 57, 58, 50, 56, 43, 48, 48, 58, 48, 48 }, .search_item = { 68, 97, 114, 107, 32, 115
    , 99, 114, 111, 108, 108, 32, 102, 111, 114, 32, 79, 118, 101, 114, 97, 108, 108, 32, 65, 114, 109, 111, 114, 32, 102, 111, 114, 32, 76, 85, 75,
    32, 51, 48, 37 }, .search_results = 4, .search_results_captured = 4, .sum_bundle = 27, .num_outlier = 0, .percent_complete = 1e0, .p0 = 3499999,
    .p25 = 3716666, .p50 = 3966666, .p75 = 4444444, .p100 = 4888888, .mean = 4080555, .std = 504631 }