cscanf

Why does sscanf not extract all the numbers from this string format properly?


I am parsing GNSS data and the module QuecTel BG96 I am using has UART serial interface. When trying to extract the data associated with the message most of it can be extracted using sscanf, aside from the time.

Here is an example message, the same used in the code below:

"\r\n+QGPSLOC: 010842.0,32.04415,5.31028,1.7,55.0,2,150.36,0.0,0.0,
060224,03\r\n\r\nOK\r\n"

Just for completeness, here is the interpretation of the fields, as in the manual:

+QGPSLOC:<UTC>,<latitude>,<longitude>,<hdop>,<altitude>,<fix>,<cog>,<spkm>,<spkn>,<date>,<nsat> OK

The issue is that the first set of numbers, representing the time as hhmmss.sss, which when extracted are always 0.

Expected output:

 240206-010842.000,       32.04415,         5.31028,      1.7,       55.0, 0002,       150.6000,         0.0000, 00000003

My code:

#include <inttypes.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>

const char* utc_datetime_format =
    "%02u%02u%02u-%02u%02u%02u.%03u";
const char* print_format =
    "%18.18s, %14.5f, %15.5f, %8.1f, %10.1f, %04u, %14.4f, "
    "%14.4f, %08u\n";

typedef struct
{
    uint64_t year : 12;
    uint64_t month : 4;
    uint64_t day : 8;
    uint64_t hour : 8;
    uint64_t minutes : 8;
    uint64_t seconds : 8;
    uint64_t msec : 16;
} DateAndTime_Compressed;

typedef struct
{
    float latitude;
    float longitude;
} GNSS_Coordinates;

typedef struct
{
    GNSS_Coordinates       coordinates;
    float                  horizontal_precision;
    float                  altitude;
    float                  course_over_ground;
    float                  speed_over_ground_mps;  // m/s
    DateAndTime_Compressed utc_time;
    uint8_t                fix_type;
    uint8_t                num_satellites;
} GNSS_Data;

int GNSS_ParseData(char* buf, GNSS_Data* data)
{
    const char* format =
        "%s %2" PRIu16 "%2" PRIu16 "%2" PRIu16 ".%3" PRIu16
        ",%f,%f,%f,%f,%" PRIu8 ",%3" PRIu16 ".%2" PRIu16
        ",%f,%f,%2" PRIu8 "%2" PRIu8 "%2" PRIu8 ",%" PRIu32
        "%s";
    char front_padding[128] = {0}, back_padding[128] = {0};

    uint16_t year, month, day, hour, minute, second,
        millisecond, cog_deg, cog_min;
    float tmp;

    // For some reason cannot extract hhmmss.sss from
    // message directly
    int ret = sscanf(
        buf, format, front_padding, &hour, &minute, &second,
        &millisecond, &(data->coordinates.latitude),
        &(data->coordinates.longitude),
        &data->horizontal_precision, &data->altitude,
        &data->fix_type, &cog_deg, &cog_min,
        &data->speed_over_ground_mps, &tmp, &day, &month,
        &year, &data->num_satellites, back_padding);
    if (ret != 19)
    {
        // Handle Error (but no error occurs here)
        return -1;
    }
    data->utc_time = (DateAndTime_Compressed){
        .year    = year,
        .month   = month,
        .day     = day,
        .hour    = hour,
        .minutes = minute,
        .seconds = second,
        .msec    = millisecond};

    data->speed_over_ground_mps *=
        (1000.0f / 3600.0f);  // kph to mps
    data->course_over_ground =
        (float)cog_deg + (((float)cog_min) / 60.0f) /
                             100.0f;  // ddd.mm to ddd.dd

    return 0;
}

int main(int argc, char** argv)
{
    char msg[] =
        "\r\n+QGPSLOC: "
        "010842.0,32.04415,5.31028,1.7,55.0,2,150.36,0.0,0."
        "0,060224,03\r\n\r\nOK\r\n";

    GNSS_Data gnss_data = {0};

    int ret = GNSS_ParseData(
        msg,
        &gnss_data);  // ret = 0 but data.utc_time wrong
                      // (dates are correct, times wrong)
    if (ret != 0)
    {
        printf("Failed to parse data\n");
        exit(-1);
    }
    char utc_date_string[64] = {0};
    snprintf(
        utc_date_string, 32, utc_datetime_format,
        gnss_data.utc_time.year, gnss_data.utc_time.month,
        gnss_data.utc_time.day, gnss_data.utc_time.hour,
        gnss_data.utc_time.minutes,
        gnss_data.utc_time.seconds,
        gnss_data.utc_time.msec);

    printf(
        print_format, utc_date_string,
        gnss_data.coordinates.latitude,
        gnss_data.coordinates.longitude,
        gnss_data.horizontal_precision, gnss_data.altitude,
        gnss_data.fix_type, gnss_data.course_over_ground,
        gnss_data.speed_over_ground_mps,
        gnss_data.num_satellites);

    return 0;
}

Solution

  • There are two types of error in your format string for sscanf():

    1. You use PRN... instead of SCN.... The former are for the printf() family, and you need to use the latter. See below for the differences.
    2. The used widths of the format specifiers do not match the widths of the variables.

    So the correct format is:

        const char* format =
            "%s %2" SCNu16 "%2" SCNu16 "%2" SCNu16 ".%3" SCNu16
            ",%f,%f,%f,%f,%" SCNu8 ",%3" SCNu16 ".%2" SCNu16
            ",%f,%f,%2" SCNu16 "%2" SCNu16 "%2" SCNu16 ",%" SCNu8
            "%s";
    

    NOTE: Please use always the maximum set of warning flags for your compiler.


    Now, why do you get zeroes for some time variables?

    For this we assume that you are using a standard compiler on a current system, so I use GCC on Windows 10 for investigation.

    This little test program shows the differences between the format constants:

    #include <inttypes.h>
    #include <stdio.h>
    
    int main(void) {
      printf("PRIu16 = \"%s\"\n", PRIu16);
      printf("SCNu16 = \"%s\"\n", SCNu16);
      uint16_t canary1 = 0x1122, value, canary2 = 0x5566;
      // 0x3344 = 13124
      printf("sscanf() = %d\n", sscanf("13124", "%" PRIu16, &value));
      printf("canary1 @%p = %04X\n", (void*)&canary1, canary1);
      printf("value   @%p = %04X\n", (void*)&value, value);
      printf("canary2 @%p = %04X\n", (void*)&canary2, canary2);
      return 0;
    }
    

    Compilation with a common set of warning flags tells us already that something is wrong:

    > gcc -Wall -pedantic formats.c -o formats.exe
    formats.c: In function 'main':
    formats.c:9:45: warning: format '%u' expects argument of type 'unsigned int *', but argument 3 has type 'uint16_t *' {aka 'short unsigned int *'} [-Wformat=]
       printf("sscanf() = %d\n", sscanf("13124", "%" PRIu16, &value));
                                                 ^~~         ~~~~~~
    In file included from formats.c:1:
    C:/Program Files/mingw-w64/x86_64-8.1.0-posix-seh-rt_v6-rev0/mingw64/x86_64-w64-mingw32/include/inttypes.h:92:17: note: format string is defined here
     #define PRIu16 "u"
    formats.c:9:45: warning: format '%u' expects argument of type 'unsigned int *', but argument 3 has type 'uint16_t *' {aka 'short unsigned int *'} [-Wformat=]
       printf("sscanf() = %d\n", sscanf("13124", "%" PRIu16, &value));
                                                 ^~~         ~~~~~~
    In file included from formats.c:1:
    C:/Program Files/mingw-w64/x86_64-8.1.0-posix-seh-rt_v6-rev0/mingw64/x86_64-w64-mingw32/include/inttypes.h:92:17: note: format string is defined here
     #define PRIu16 "u"
    

    And the result reproduces your observation, canary1 is zeroed:

    > formats.exe
    PRIu16 = "u"
    SCNu16 = "hu"
    sscanf() = 1
    canary1 @000000000061FE1E = 0000
    value   @000000000061FE1C = 3344
    canary2 @000000000061FE1A = 5566
    

    We see that PRIu16 is actually "u", which sscanf() uses to write to an unsigned int. On our systems, this has usually the same width as uint32_t. So sscanf() happily writes 4 bytes, overwriting canary1.

    Which variable gets overwritten depends on the order of the variables in the memory and the sequence by which the values are stored. None of these order/sequence are defined by the standard, so I used two canaries on both sides.

    This explains the zeroes you observe.


    But why is PRIu16 associated with unsigned int, when SCNu16 is associated with unsigned short int?

    The secret is the integer promotion of integer arguments on variadic functions. And printf() is such a variadic function.

    If you give a uint16_t value to printf(), it will be converted to an int. As int commonly suffices to hold all potential values of uint16_t, the conversion result is not an unsigned int. However, the bit pattern is identical for both types in the value range of uint16_t.

    So printf() needs "u" to print such values correctly.

    On the other hand, scanf() must not write beyond the space of the provided variable. So it needs to know the width, which leads to "hu".