c++capirdp

How to read a file with Unicode contents


How can I read a file with Unicode contents using C/C++?

I used ReadFile function to read a file with Unicode contents, but it doesn't have the true output. I want to have a buffer that contains all the contents of the file

I use this code:

#include <Windows.h>

int main()
{
    HANDLE hndlRead;
    OVERLAPPED ol = {0};

    CHAR* szReadBuffer;
    INT fileSize;

    hndlRead = CreateFileW(L"file", GENERIC_READ, 0, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);

    if (hndlRead != INVALID_HANDLE_VALUE)
    {
        fileSize = GetFileSize(hndlRead, NULL);
        szReadBuffer = (CHAR*) HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, (fileSize)*2);
        DWORD nb=0;
        int nSize=fileSize;
        if (szReadBuffer != NULL)
        {
            ReadFile(hndlRead, szReadBuffer, nSize, &nb, &ol);
        }
    }

    return 0;
}

Is there any way to read this file correctly?

This is nb and szReadBuffer:

enter image description here

This is my file content in notpad++:

enter image description here


Solution

  • Your code works fine. It reads the rdp file verbatim into memory.

    You are troubled by the BOM (byte order mark) at the beginning of the rdp file.

    If you look at the rdp file with text editor (notepad for instance) you will see this:

    screen mode id:i:2
    use multimon:i:0
    desktopwidth:i:2560
    desktopheight:i:1600
    ....
    

    If you look at the rdp file with a hexadecimal editor you will see this:

    0000 FFFE 7300 6300 7200 6500 6500 6E00 2000 ..s.c.r.e.e.n. .
    0008 6D00 6F00 6400 6500 2000 6900 6400 3A00 m.o.d.e. .i.d...
    ....
    

    FFFE is the byte order mark which indicates that the file is a text file encoded in little endian UNICODE, so each character takes 2 bytes instead of 1 byte.

    Once the file read in memory you will get this (0x00318479 being the address szReadBuffer points to):

    enter image description here

    Corrected program:

    #include <Windows.h>
    
    int main()
    {
      HANDLE hndlRead;
    
      WCHAR* szReadBuffer;   // WCHAR instead of CHAR
      INT fileSize;
    
      hndlRead = CreateFileW(L"rdp.RDP", GENERIC_READ, 0, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
    
      if (hndlRead != INVALID_HANDLE_VALUE)
      {
        fileSize = GetFileSize(hndlRead, NULL);
        szReadBuffer = (WCHAR*)calloc(fileSize + sizeof(WCHAR), 1);  // + sizeof(WCHAR) for NUL string terminator
        DWORD nb = 0;
        int nSize = fileSize;
        if (szReadBuffer != NULL)
        {
          ReadFile(hndlRead, szReadBuffer, nSize, &nb, NULL);
        }
    
        CloseHandle(hndlRead);   // close what we have opened
    
        WCHAR *textwithoutbom = szReadBuffer + 1;  // skip BOM
    
        // put breakpoint here and inspect textwithoutbom
    
        free(szReadBuffer);  // free what we have allocated
      }
    
      return 0;
    }