cgccfreadmingw32

What is the standard behavior of fread in C on Windows?


Nominally, I expected fread using "rb" mode to interpret carriage return and linefeed as-is (\r as "\r",\n as "\n"). But my mingw compiler downloaded from "Winlib" keeps behaving differently. Sometimes it would convert \n to "\r\n", sometimes the size of read return does not reflect either the post-compression or post-expansion of end-of-line signature.

I wonder, what is the standard of the first part (eol handling for "rb") and second part (size of read). I suspect it's a mingw32 gcc bug but any experience is welcome.

(p.s. I ended making a complicated read() abstraction in C because of this... but begin wondering whether I should just download clang or msvc at this point if those are the case.)

EDIT / FOLLOWUP (1):

I wrote a go program to act as the "hex dump" cus I did not have one. On "r", it expands unpaired "\n" to "\r\n" consistently. Which commenter BoP's link did suggest that "r" is OS-dependent, so it's not a portable solution then.

Mode "r"
stdout:         "Hello World!\r\n" // reading live using binary.exe (.c_)
expected:       "Hello World!\n"   // static test string in .go source code
self (13):      "Hello World!\n"   // reading live using .go
                status: 0          // fail
stdout:         "Hello World!\r\n\r\n"
expected:       "Hello World!\n\n"
self (14):      "Hello World!\n\n"
                status: 0
stdout:         "Hello World!\r\n\r\n\r\n"
expected:       "Hello World!\n\n\n"
self (15):      "Hello World!\n\n\n"
                status: 0
stdout:         "Hello World!\r\n\r\n"
expected:       "Hello World!\n\r\n"
self (15):      "Hello World!\n\r\n"
                status: 0
...

Meanwhile, "rb" mode expands even paired "\r\n" into "\r\r\n".

// Mode "rb" (I'll only show unique ones)
stdout:         "Hello World!\r\r\n"
expected:       "Hello World!\r\n"
self (14):      "Hello World!\r\n"
                status: 0
stdout:         "Hello World!\r\r\n\r"
expected:       "Hello World!\r\n\r"
self (15):      "Hello World!\r\n\r"
                status: 0
stdout:         "Hello World!\r\n\r\r\n"
expected:       "Hello World!\n\r\n"
self (15):      "Hello World!\n\r\n"
                status: 0
...

I guess it's just driving me crazy that "r" mode does not consistently turn everything to just "\n" by having it specified in a ISO/POSIX standard. Nor does "rb" mode really read as-is (why eol handling in binary mode!!). Would clang or msvc act differently, or is it the syscall?

Update: This is for the commenter user2357112... who asked me for the test code.

gcc ex07/binary.c -o ex07/binary.exe
go run ex07/main.go
rm ex07/binary.exe
#include <stdio.h> // import { printf }
#include <stdlib.h> // import { exit }

int main(int ac, char** av) {
    if (ac == 2) {
        FILE* fd = fopen(av[1], "rb");
        char buffer[128];
        int gcount = fread(buffer, 1,127, fd);
        if (gcount == -1) {
            exit(1);
        }
        buffer[gcount] = '\0';
        printf("%s", buffer);
    }
    return 0;
}
package main

import (
    "bytes"
    "fmt"
    "os"
    "os/exec"
    "strings"
    "strconv"
)

func runBinary(args ...string) (string, string, error) {
    cmd := exec.Command("ex07/binary.exe", args...)
    var outbuf, errbuf bytes.Buffer
    cmd.Stdout = &outbuf
    cmd.Stderr = &errbuf
    err := cmd.Run()
    return outbuf.String(), errbuf.String(), err
}

func selfTest(filename string) (string, int) {

    fd, _ := os.Open(filename)
    self_byte := make([]byte, 128)
    gcount, _ := fd.ReadAt(self_byte, 0)
    self_string := string(self_byte)
    fd.Close()
    return self_string, gcount
}

func runTest(test_string string) {

    fd, _ := os.Create("ex07/file.txt")
    fd.WriteString(test_string)
    
    stdout, _, _ := runBinary("ex07/file.txt")
    test_string = strings.ReplaceAll(test_string, "\r", "\\r")
    test_string = strings.ReplaceAll(test_string, "\n", "\\n")
    stdout = strings.ReplaceAll(stdout, "\r", "\\r")
    stdout = strings.ReplaceAll(stdout, "\n", "\\n")

    fmt.Println("stdout:\t\t\"" + stdout + "\"")
    fmt.Println("expected:\t\"" + test_string + "\"")

    self_string, gcount := selfTest("ex07/file.txt")
    self_string = strings.ReplaceAll(self_string, "\r", "\\r")
    self_string = strings.ReplaceAll(self_string, "\n", "\\n")
    fmt.Println("self ("+strconv.Itoa(gcount)+"):\t\"" + self_string + "\"")

    status := strings.Compare(stdout, test_string)
    if status == 0 { status = 1 } else { status = 0 }
    fmt.Println("\t\tstatus: " + strconv.Itoa(status))


    fd.Close()
    os.Remove("ex07/file.txt")
}

func main() {
    runTest("Hello World!")
    runTest("Hello World!\n")
    runTest("Hello World!\r\n")
    runTest("Hello World!\r")

    runTest("Hello World!\r\r")
    runTest("Hello World!\r\r\r")
    runTest("Hello World!\n\n")
    runTest("Hello World!\n\n\n")

    runTest("Hello World!\r\n\r")
    runTest("Hello World!\n\r\n")
    runTest("Hello World!\r\n\r\n")
    runTest("Hello World!\n\r\n\r")
}

Solution: Thanks to Charlie, Weather Vane and user###

Modify the c program to escape the \r to "\r" and \n to "\n" in a loop so printf doesn't do eol translation. It now passes all tests.

#include <stdio.h> // import { printf }
#include <stdlib.h> // import { exit }

int main(int ac, char** av) {
    if (ac == 2) {
        FILE* fd = fopen(av[1], "rb");
        char buffer[128];
        int gcount = fread(buffer, 1,127, fd);
        if (gcount == -1) {
            exit(1);
        }
        buffer[gcount] = '\0';

        // Replace printf with this:
        for (char* ptr = buffer; *ptr != '\0'; ptr++) {
            if (*ptr == '\r') printf("\\r");
            else if (*ptr == '\n') printf("\\n");
            else printf("%c", *ptr);
        }
        // end
    }
    return 0;
}

Solution

  • If the stream is opened in binary mode ("rb" mode string), fread should not perform any translation and the return value should be the exact number of bytes read divided by the element length specified in the call. Note that the Microsoft library does not implement the translation in fread itself, but in the underlying read API and the translation can be much more than just end-of-line handling.

    The documentation for fread is rather straightforward and likely incomplete but that of fwrite mentions Unicode handling on text streams that is non-trivial. Programming in C on these targets is a nightmare.

    Update: your C program reads the file contents in binary mode, but you output to stdout using printf, and stdout is by default in text mode with eol translation, so \n in the buffer gets translated on output to \r\n and \r\n (as read by fread in binary mode) gets translated to \r\r\n which is confusing.

    Analyzing the output of the C program does not tell you what fread did, because writing to stdout has side effects. To analyse the data read by fread, write the hex dump code in C and output as text. Also note that fread does not return -1 upon error.

    Try this test program:

    #include <errno.h>
    #include <stdio.h>
    #include <stdlib.h>
    #include <string.h>
    
    int main(int ac, char** av) {
        if (ac < 2) {
            printf("usage: %s filename\n", av[0]);
            return 1;
        }
        FILE *fp = fopen(av[1], "rb");
        if (fp == NULL) {
            fprintf(stderr, "cannot open %s: %s\n",
                    av[1], strerror(errno));
            return 1;
        }
        char buffer[128];
        char output[80];
        size_t nread;
        int addr = 0, pos = 0, hex = 0, bin = 0;
        const char hexdigits[] = "0123456789ABCDEF";
        while ((nread = fread(buffer, 1, sizeof buffer, fp)) != 0) {
            for (size_t i = 0; i < nread; i++) {
                if (pos == 16) {
                    output[bin] = '\0';
                    printf("%s\n", output);
                    pos = 0;
                }
                if (pos == 0) {
                    hex = snprintf(output, sizeof(output), "%06X  ", addr);
                    bin = hex + snprintf(output + hex, sizeof(output) - hex, "%23s-%23s  ", "", "");
                }
                unsigned char c = buffer[i];
                output[hex + 0] = hexdigits[c >> 4];
                output[hex + 1] = hexdigits[c & 15];
                hex += 3;
                output[bin++] = (c >= ' ' && c < 0x7F) ? c : '.';
                pos++;
                addr++;
            }
        }
        output[bin] = '\0';
        printf("%s\n", output);
        fclose(fp);
        return 0;
    }