I am trying to write this value to a UTF-8 XML encoded text file:
Estudante - Leitura da Bíblia
I am using a lambda function:
auto AddLabel = [](tinyxml2::XMLDocument& rDoc, tinyxml2::XMLElement* pLabels, LPCSTR szElement, CString strValue, int iIndex = -1)
{
tinyxml2::XMLElement* pLabel = rDoc.NewElement(szElement);
if (pLabels != nullptr && pLabel != nullptr)
{
pLabel->SetText(CT2CA(strValue));
if (iIndex != -1)
{
pLabel->SetAttribute("Index", iIndex);
}
pLabels->InsertEndChild(pLabel);
}
};
But I end up with a corrupt character when I open the XML in Notepad++:
This is how I save my XML file:
bool CMeetingScheduleAssistantApp::SaveToXML(CString strFileXML, tinyxml2::XMLDocument& rDocXML)
{
FILE *fStream = nullptr;
CString strError, strErrorCode;
bool bDisplayError = false;
int iErrorNo = -1;
using namespace tinyxml2;
// Does the file already exist?
if (PathFileExists(strFileXML))
{
// It does, so try to delete it
if (!::DeleteFile(strFileXML))
{
// Unable to delete!
AfxMessageBox(theApp.GetLastErrorAsString(), MB_OK | MB_ICONINFORMATION);
return false;
}
}
// Now try to create a FILE buffer (allows UNICODE filenames)
const auto eResult = _tfopen_s(&fStream, strFileXML, _T("w"));
if (eResult != 0 || fStream == nullptr) // Error
{
bDisplayError = true;
_tcserror_s(strErrorCode.GetBufferSetLength(_MAX_PATH), _MAX_PATH, errno);
strErrorCode.ReleaseBuffer();
}
else // Success
{
// Now try to save the XML file
const XMLError eXML = rDocXML.SaveFile(fStream);
const int fileCloseResult = fclose(fStream);
if (eXML != XMLError::XML_SUCCESS)
{
// Error saving
bDisplayError = true;
strErrorCode = rDocXML.ErrorName();
iErrorNo = rDocXML.ErrorLineNum();
}
if (!bDisplayError)
{
if (fileCloseResult != 0)
{
// There was a problem closing the stream. We should tell the user
bDisplayError = true;
_tcserror_s(strErrorCode.GetBufferSetLength(_MAX_PATH), _MAX_PATH, errno);
strErrorCode.ReleaseBuffer();
}
}
}
if (bDisplayError)
{
if (iErrorNo == -1)
iErrorNo = errno;
strError.Format(IDS_TPL_ERROR_SAVE_XML, strFileXML, strErrorCode, iErrorNo);
AfxMessageBox(strError, MB_OK | MB_ICONINFORMATION);
return false;
}
return true;
}
Why is this happening? I can confirm that strValue
has the correct content as a CString
. I can confirm this:
if (strValue.Left(7) == L"Leitura")
{
AfxMessageBox(strValue);
pLabel->SetText(CT2CA(strValue));
AfxMessageBox(CA2CT(pLabel->GetText(), CP_UTF8));
}
When I do the CA2CT
call the result has the corrupt í
character. So How can I get around this problem? tinyxml2
expects a UTS-8 enconded string:
void XMLElement::SetText( const char* inText )
{
if ( FirstChild() && FirstChild()->ToText() )
FirstChild()->SetValue( inText );
else {
XMLText* theText = GetDocument()->NewText( inText );
InsertFirstChild( theText );
}
}
I have the same issue for a variety of characters in strings depending on the language. Another affected language is Polish.
How do I write my string without corrupting it as UTF8?
The error was simple:
auto AddLabel = [](tinyxml2::XMLDocument& rDoc, tinyxml2::XMLElement* pLabels, LPCSTR szElement, CString strValue, int iIndex = -1)
{
tinyxml2::XMLElement* pLabel = rDoc.NewElement(szElement);
if (pLabels != nullptr && pLabel != nullptr)
{
pLabel->SetText(CT2CA(strValue, CP_UTF8));
if (iIndex != -1)
{
pLabel->SetAttribute("Index", iIndex);
}
pLabels->InsertEndChild(pLabel);
}
};
I was missing the CP_UTF8
parameter to the CT2CA
macro.