cfilebinaryfopendos2unix

How to convert a text file from DOS format to UNIX format


I am trying to make a program in C, that reads a text file and replace \r\n with \n to the same file converting the line ending from DOS to UNIX. I use fgetc and treat the file as a binary file. Thanks in advance.

#include <stdio.h>

int main()
{
    FILE *fptr = fopen("textfile.txt", "rb+");
    if (fptr == NULL)
    {
        printf("erro ficheiro \n");
        return 0;
    }

     while((ch = fgetc(fptr)) != EOF) {
          if(ch == '\r') {
           fprintf(fptr,"%c", '\n');
        } else {
         fprintf(fptr,"%c", ch);
        }
    }

    fclose(fptr);
}

Solution

  • If we assume the file uses a single byte character set, we just need to ignore all the '\r' characters when converting a text file form DOS to UNIX.

    We also assume that the size of the file is less than the highest unsigned integer.

    The reason we do these assumptions, is to keep the example short.

    Be aware that the example below overwrites the original file, as you asked. Normally you shouldn't do this, as you can lose the contents of the original file, if an error occurs.

    #include <stdio.h>
    #include <stdlib.h>
    #include <sys/stat.h>
    
    // Return a negative number on failure and 0 on success.
    int main()
    {
        const char* filename = "textfile.txt";
    
        // Get the file size. We assume the filesize is not bigger than UINT_MAX.
        struct stat info;
        if (stat(filename, &info) != 0)
            return -1;
        size_t filesize = (size_t)info.st_size;
    
        // Allocate memory for reading the file
        char* content = (char*)malloc(filesize);
        if (content == NULL)
            return -2;
    
        // Open the file for reading
        FILE* fptr = fopen(filename, "rb");
        if (fptr == NULL)
            return -3;
    
        // Read the file and close it - we assume the filesize is not bigger than UINT_MAX.
        size_t count = fread(content, filesize, 1, fptr);
        fclose(fptr);
        if (count != 1)
            return -4;
    
        // Remove all '\r' characters 
        size_t newsize = 0;
        for (long i = 0; i < filesize; ++i) {
            char ch = content[i];
            if (ch != '\r') {
                content[newsize] = ch;
                ++newsize;
            }
        }
    
        // Test if we found any
        if (newsize != filesize) {
            // Open the file for writing and truncate it.
            FILE* fptr = fopen(filename, "wb");
            if (fptr == NULL)
                return -5;
    
            // Write the new output to the file. Note that if an error occurs,
            // then we will lose the original contents of the file.
            if (newsize > 0)
                count = fwrite(content, newsize, 1, fptr);
            fclose(fptr);
            if (newsize > 0 && count != 1)
                return -6;
        }
    
        // For a console application, we don't need to free the memory allocated
        // with malloc(), but normally we should free it.
    
        // Success 
        return 0;
    } // main()
    

    To only remove '\r' followed by '\n' replace the loop with this loop:

        // Remove all '\r' characters followed by a '\n' character
        size_t newsize = 0;
        for (long i = 0; i < filesize; ++i) {
            char ch = content[i];
            char ch2 = (i < filesize - 1) ? content[i + 1] : 0;
            if (ch == '\r' && ch2 == '\n') {
                ch = '\n';
                ++i;
            }
            content[newsize++] = ch;
        }