c++winapigdiuniscribe

How to tell if a surrogate pair Unicode character is supported by the font?


I was able to find the following method of determining if a Unicode-16 character is supported by a font. Unfortunately that doesn't work for surrogate pair Unicode characters, since WCRANGE struct supported by GetFontUnicodeRanges function returns only WCHAR (16-bit) parameters as output.

Here's an example of what I'm trying to do:

LRESULT CALLBACK WndProc(HWND hWnd, UINT message, WPARAM wParam, LPARAM lParam)
{
    static HFONT hFont = NULL;

    switch (message)
    {
        case WM_CREATE:
        {
            LOGFONT lf = {0};
            lf.lfHeight = -64;
            ::StringCchCopy(lf.lfFaceName, _countof(lf.lfFaceName), L"Arial");

            hFont = ::CreateFontIndirect(&lf);
        }
        break;

        case WM_PAINT:
        {
            PAINTSTRUCT ps;
            HDC hdc = BeginPaint(hWnd, &ps);

            RECT rcClient = {0};
            ::GetClientRect(hWnd, &rcClient);

            HGDIOBJ hOldFont = ::SelectObject(hdc, hFont);

            LPCTSTR pStr = L">\U0001F609<";
            int nLn = wcslen(pStr);

            RECT rc = {20, 20, rcClient.right, rcClient.bottom};
            ::DrawText(hdc, pStr, nLn, &rc, DT_NOPREFIX | DT_SINGLELINE);

            ::SelectObject(hdc, hOldFont);

            EndPaint(hWnd, &ps);
        }
        break;

    //....

If I run it on Windows 10, I get this:

enter image description here

But this is what I'm getting on Windows 7:

enter image description here

So what is the way to tell if that middle character is not going to be rendered?


PS. I also attempted to use badly documented Uniscribe, and a modified version of this tutorial as an example. But no matter what I did, it failed to produce a discernible outcome between Win10 and Win7. If it helps to answer this question, here's the code I experimented with:

//Call from WM_PAINT handler
std::wstring str;
test02(hdc, pStr, str);

RECT rc0 = {0, 200, rcClient.right, rcClient.bottom};
::DrawText(hdc, str.c_str(), str.size(), &rc0, DT_NOPREFIX | DT_SINGLELINE);

and then:

void test02(HDC hDc, LPCTSTR pStr, std::wstring& str)
{
    //'str' = receives debugging outcome (needs to be printed on the screen)

    //SOURCE:
    //      https://maxradi.us/documents/uniscribe/

    HRESULT hr;
    SCRIPT_STRING_ANALYSIS ssa = {0};

    int nLn = wcslen(pStr);

    hr = ::ScriptStringAnalyse(hDc,
        pStr,
        nLn,
        1024,
        -1,
        SSA_GLYPHS,
        0, NULL, NULL, NULL, NULL, NULL, &ssa);

    if(SUCCEEDED(hr))
    {
        const SCRIPT_PROPERTIES **g_ppScriptProperties;
        int g_iMaxScript;

        hr = ::ScriptGetProperties(&g_ppScriptProperties, &g_iMaxScript);
        if(SUCCEEDED(hr))
        {
            const int cMaxItems = 20;
            SCRIPT_ITEM si[cMaxItems + 1];
            SCRIPT_ITEM *pItems = si;
            int cItems;                 //Receives number of glyphs

            SCRIPT_CONTROL scrCtrl = {0};
            SCRIPT_STATE scrState = {0};

            hr = ::ScriptItemize(pStr, nLn, cMaxItems, &scrCtrl, &scrState, pItems, &cItems);
            if(SUCCEEDED(hr))
            {
                FormatAdd2(str, L"cItems=%d: ", cItems);

                int nCntGlyphs = nLn * 4;
                WORD* pGlyphs = new WORD[nCntGlyphs];
                WORD* pLogClust = new WORD[nLn];
                SCRIPT_VISATTR* pSVs = new SCRIPT_VISATTR[nCntGlyphs];

                //Go through each run
                for(int i = 0; i < cItems; i++)
                {
                    FormatAdd2(str, L"[%d]:", i);

                    SCRIPT_CACHE sc = NULL;
                    int nCntGlyphsWrtn = 0;
                    int iPos = pItems[i].iCharPos;
                    const WCHAR* pP = &pStr[iPos];
                    int cChars = i + 1 < cItems ? pItems[i + 1].iCharPos - iPos : nLn - iPos;

                    hr = ::ScriptShape(hDc, &sc, pP, cChars,
                        nCntGlyphs, &pItems[i].a, pGlyphs, pLogClust, pSVs, &nCntGlyphsWrtn);

                    if(SUCCEEDED(hr))
                    {
                        std::wstring strGlyphs;
                        for(int g = 0; g < nCntGlyphsWrtn; g++)
                        {
                            FormatAdd2(strGlyphs, L"%02X,", pGlyphs[g]);
                        }

                        std::wstring strLogClust;
                        for(int w = 0; w < cChars; w++)
                        {
                            FormatAdd2(strLogClust, L"%02X,", pLogClust[w]);
                        }

                        std::wstring strSVs;
                        for(int g = 0; g < nCntGlyphsWrtn; g++)
                        {
                            FormatAdd2(strSVs, L"%02X,", pSVs[g]);
                        }

                        FormatAdd2(str, L"c=%d {G:%s LC:%s SV:%s} ", nCntGlyphsWrtn, strGlyphs.c_str(), strLogClust.c_str(), strSVs.c_str());


                        int* pAdvances = new int[nCntGlyphsWrtn];
                        GOFFSET* pOffsets = new GOFFSET[nCntGlyphsWrtn];
                        ABC abc = {0};

                        hr = ::ScriptPlace(hDc, &sc, pGlyphs, nCntGlyphsWrtn, pSVs, &pItems[i].a, pAdvances, pOffsets, &abc);
                        if(SUCCEEDED(hr))
                        {
                            std::wstring strAdvs;
                            for(int g = 0; g < nCntGlyphsWrtn; g++)
                            {
                                FormatAdd2(strAdvs, L"%02X,", pAdvances[g]);
                            }

                            std::wstring strOffs;
                            for(int g = 0; g < nCntGlyphsWrtn; g++)
                            {
                                FormatAdd2(strOffs, L"u=%02X v=%02X,", pOffsets[g].du, pOffsets[g].dv);
                            }


                            FormatAdd2(str, L"{a=%d,b=%d,c=%d} {A:%s OF:%s}", abc.abcA, abc.abcB, abc.abcC, strAdvs.c_str(), strOffs.c_str());
                        }


                        delete[] pAdvances;
                        delete[] pOffsets;

                    }



                    //Clear cache
                    hr = ::ScriptFreeCache(&sc);
                    assert(SUCCEEDED(hr));
                }

                delete[] pSVs;
                delete[] pGlyphs;
                delete[] pLogClust;
            }
        }

        hr = ::ScriptStringFree(&ssa);
        assert(SUCCEEDED(hr));
    }
}

std::wstring& FormatAdd2(std::wstring& str, LPCTSTR pszFormat, ...)
{
    va_list argList;
    va_start(argList, pszFormat);
    int nSz = _vsctprintf(pszFormat, argList) + 1;

    TCHAR* pBuff = new TCHAR[nSz];      //One char for last null
    pBuff[0] = 0;
    _vstprintf_s(pBuff, nSz, pszFormat, argList);
    pBuff[nSz - 1] = 0;

    str.append(pBuff);

    delete[] pBuff;

    va_end(argList);

    return str;
}

EDIT: I was able to create a demo GUI app that demonstrates the solution suggested by Barmak Shemirani below.


Solution

  • 😉 character is actually not supported in Windows 10 Arial font. Windows 10 uses "Segoe UI Emoji" as fallback font for that particular code point.

    So first we have to figure out if fallback font is used. Then check the glyph index to see if it is tofu character (usually shown as square sign â–¯)

    We can use meta file to find if font substitution is used. Select that font in to HDC.

    Use ScriptGetFontProperties to find the values for unsupported glyphs.

    Use GetCharacterPlacement to find the glyph indices for the string. If the glyph index matches unsupported glyphs, then the code point is being printed as tofu â–¯.


    Edit:

    If you try to print Chinese character etc. then you have to choose the appropriate font (SimSun for Chinese)

    This part is done by IMLangFontLink. It's a different type of font substitution. The example below will test for single code point (it can be expanded to handle a string).

    If Segoe UI font is selected, then for the Chinese character 请, it will switch Segoe UI to SimSun.

    For emojis, it will switch Segoe UI to Segoe UI Emoji

    See also this article in oldnewthing. Note the article in OldNewThing does not handle Emojis, it just lets TextOut handle it (which is handled correctly in Windows 10 so the result appears okay)


    #define _CRT_SECURE_NO_WARNINGS
    #include <iostream>
    #include <windows.h>
    #include <usp10.h>
    #include <AtlBase.h>
    #include <AtlCom.h>
    #include <mlang.h>
    
    #pragma comment(lib, "Usp10.lib")
    
    int CALLBACK metafileproc(HDC, HANDLETABLE*, const ENHMETARECORD *record,
        int, LPARAM logfont)
    {
        if(record->iType == EMR_EXTCREATEFONTINDIRECTW)
        {
            auto ptr = (const EMREXTCREATEFONTINDIRECTW*)record;
            *(LOGFONT*)logfont = ptr->elfw.elfLogFont;
        }
        return 1;
    }
    
    HFONT GetFallbackFont(const wchar_t *str, HFONT hfont_test)
    {
        //use metafile to find the fallback font
        auto metafile_hdc = CreateEnhMetaFile(NULL, NULL, NULL, NULL);
        auto metafile_oldfont = SelectObject(metafile_hdc, hfont_test);
    
        SCRIPT_STRING_ANALYSIS ssa;
        ScriptStringAnalyse(metafile_hdc, str, wcslen(str), 0, -1,
            SSA_METAFILE | SSA_FALLBACK | SSA_GLYPHS | SSA_LINK,
            0, NULL, NULL, NULL, NULL, NULL, &ssa);
        ScriptStringOut(ssa, 0, 0, 0, NULL, 0, 0, FALSE);
        ScriptStringFree(&ssa);
    
        SelectObject(metafile_hdc, metafile_oldfont);
        auto hmetafile = CloseEnhMetaFile(metafile_hdc);
    
        LOGFONT logfont = { 0 };
        EnumEnhMetaFile(0, hmetafile, metafileproc, &logfont, NULL);
        wprintf(L"Selecting fallback font: %s\n", logfont.lfFaceName);
        HFONT hfont = CreateFontIndirect(&logfont);
        DeleteEnhMetaFile(hmetafile);
    
        return hfont;
    }
    
    //IsTofu is for testing emojis
    //It accepts a Unicode string
    bool IsTofuError(HDC hdc, HFONT hfont_test, const wchar_t *str)
    {
        if(wcsstr(str, L" "))
        {
            wprintf(L"*** cannot test strings containing blank space\n");
        }
    
        auto hfont = GetFallbackFont(str, hfont_test);
        auto oldfont = SelectObject(hdc, hfont);
    
        //find the characters not supported in this font
        //note, blank space is blank, unsupported fonts can be blank also
        SCRIPT_CACHE sc = NULL;
        SCRIPT_FONTPROPERTIES fp = { sizeof(fp) };
        ScriptGetFontProperties(hdc, &sc, &fp);
        ScriptFreeCache(&sc);
        wprintf(L"SCRIPT_FONTPROPERTIES:\n");
        wprintf(L" Blank: %d, Default: %d, Invalid: %d\n",
            fp.wgBlank, fp.wgDefault, fp.wgInvalid);
    
        // Get glyph indices for the string
        GCP_RESULTS gcp_results = { sizeof(GCP_RESULTS) };
        gcp_results.nGlyphs = wcslen(str);
        auto wstr_memory = (wchar_t*)calloc(wcslen(str) + 1, sizeof(wchar_t));
        gcp_results.lpGlyphs = wstr_memory;
        GetCharacterPlacement(hdc, str, wcslen(str), 0, &gcp_results, GCP_GLYPHSHAPE);
    
        //check the characters against wgBlank...
        bool istofu = false;
        wprintf(L"Glyphs:");
        for(UINT i = 0; i < gcp_results.nGlyphs; i++)
        {
            wchar_t n = gcp_results.lpGlyphs[i];
            wprintf(L"%d,", (int)n);
            if(n == fp.wgBlank || n == fp.wgInvalid || n == fp.wgDefault)
                istofu = true;
        }
        wprintf(L"\n");
    
        free(wstr_memory);
        SelectObject(hdc, oldfont);
        DeleteObject(hfont);
    
        if (istofu)
            wprintf(L"Tofu error\n\n");
    
        return istofu;
    }
    
    //get_font_link checks if there is font substitution, 
    //this usually applies to Asian fonts
    //Note, this function doesn't accept a unicode string
    //it only takes a single code point. You can imrpove it to accept strings
    bool get_font_link(const wchar_t *single_codepoint,
        HDC hdc,
        HFONT &hfont_src,
        HFONT &hfont_dst,
        CComPtr<IMLangFontLink> &ifont,
        CComPtr<IMLangCodePages> &icodepages)
    {
        DWORD codepages_dst[100] = { 0 };
        LONG codepages_count = 100;
    
        DWORD codepages = 0;
        if(FAILED(icodepages->GetStrCodePages(single_codepoint, wcslen(single_codepoint),
            0, codepages_dst, &codepages_count)))
            return false;
        codepages = codepages_dst[0];
    
        if(FAILED(ifont->MapFont(hdc, codepages_dst[0], hfont_src, &hfont_dst)))
            return false;
    
        SelectObject(hdc, hfont_dst);
        wchar_t buf[100];
        GetTextFace(hdc, _countof(buf), buf);
        wprintf(L"get_font_link:\nSelecting a different font: %s\n", buf);
        return true;
    }
    
    int main()
    {
        CoInitialize(NULL);
    
        {
            CComPtr<IMultiLanguage> imultilang;
            CComPtr<IMLangFontLink> ifont;
            CComPtr<IMLangCodePages> icodepages;
            if(FAILED(imultilang.CoCreateInstance(CLSID_CMultiLanguage))) return 0;
            if(FAILED(imultilang->QueryInterface(&ifont))) return 0;
            if(FAILED(imultilang->QueryInterface(&icodepages))) return 0;
    
            //const wchar_t *single_codepoint = L"a";
            //const wchar_t *single_codepoint = L"请";
            const wchar_t *single_codepoint = L"😉";
    
            auto hdc = GetDC(0);
            auto memdc = CreateCompatibleDC(hdc);
            auto hbitmap = CreateCompatibleBitmap(hdc, 1, 1);
            auto oldbmp = SelectObject(memdc, hbitmap);
            auto hfont_src = CreateFont(10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, L"Segoe UI");
            auto oldfont = SelectObject(hdc, hfont_src);
    
            HFONT hfont_dst = NULL;
    
            if(IsTofuError(hdc, hfont_src, single_codepoint))
            {
                if(!get_font_link(
                    single_codepoint, memdc, hfont_src, hfont_dst, ifont, icodepages))
                    wprintf(L"Can't find a substitution!\n");
            }
    
            SelectObject(memdc, oldbmp);
            SelectObject(memdc, oldfont);
            DeleteObject(hbitmap);
            DeleteDC(memdc);
            ReleaseDC(0, hdc);
            DeleteObject(hfont_src);
    
            if(ifont && hfont_dst)
                ifont->ReleaseFont(hfont_dst);
        }
    
        CoUninitialize();
    
        return 0;
    }
    

    Output:

    IsTofu is false for Windows 10.

    It will be true for some older Windows versions. But this is not tested in WinXP

    Using GetUniscribeFallbackFont from this link

    Note, Windows documentation describes GetCharacterPlacement as obsolete, it recommends using Uniscribe functions. But I don't know what replacement to use for it here.