c++newlineiostreamutf-16codecvt

C++ iostream UTF-16 file I/O with CR LF translation


I want to read and write utf-16 files which use CR LF line separators (L"\r\n"). Using C++ (Microsoft Visual Studio 2010) iostreams. I want every L"\n" written to the stream to be translated to L"\r\n" transparently. Using the codecvt_utf16 locale facet requires to open the fstream in ios::binary mode, losing the usual text mode \n to \r\n translation.

std::wofstream wofs;
wofs.open("try_utf16.txt", std::ios::binary);
wofs.imbue(
    std::locale(
        wofs.getloc(),
        new std::codecvt_utf16<wchar_t, 0x10ffff, std::generate_header>));
wofs << L"Hi!\n"; // i want a '\r' to be inserted before the '\n' in the output file
wofs.close();++

I want a solution without needing extra libraries like BOOST.


Solution

  • I think I've found a solution myself, I want to share it. Your comments are welcome!

    #include <iostream>
    #include <fstream>
    
    class wcrlf_filebuf : public std::basic_filebuf<wchar_t>
    {
        typedef std::basic_filebuf<wchar_t> BASE;
        wchar_t awch[128];
        bool bBomWritten;
    public:
        wcrlf_filebuf() 
            : bBomWritten(false)
        { memset(awch, 0, sizeof awch); }
    
        wcrlf_filebuf(const wchar_t *wszFilespec, 
                      std::ios_base::open_mode _Mode = std::ios_base::out) 
            : bBomWritten(false)
        {
            memset(awch, 0, sizeof awch);
            BASE::open(wszFilespec, _Mode | std::ios_base::binary);
            pubsetbuf(awch, _countof(awch));
        }
    
        wcrlf_filebuf *open(const wchar_t *wszFilespec, 
                            std::ios_base::open_mode _Mode = std::ios_base::out)
        {   
            BASE::open(wszFilespec, _Mode | std::ios_base::binary);
            pubsetbuf(awch, _countof(awch));
            return this;
        }
    
        virtual int_type overflow(int_type ch = traits_type::eof())
        {
            if (!bBomWritten) {
                bBomWritten = true;
                int_type iRet = BASE::overflow(0xfeff);
                if (iRet != traits_type::not_eof(0xfeff)) return iRet;
            }
            if (ch == '\n') {
                int_type iRet = BASE::overflow('\r');
                if (iRet != traits_type::not_eof('\r')) return iRet;
            }
            return BASE::overflow(ch);
        }
    };
    
    class wcrlfofstream : public std::wostream
    {
        typedef std::wostream BASE;
    public:
        wcrlfofstream(const wchar_t *wszFilespec, 
                      std::ios_base::open_mode _Mode = std::ios_base::out) 
            : std::wostream(new wcrlf_filebuf(wszFilespec, _Mode))
        {}
    
        wcrlf_filebuf* rdbuf()
        {
            return dynamic_cast<wcrlf_filebuf*>(std::wostream::rdbuf());
        }
    
        void close()
        {
            rdbuf()->close();
        }
    };