cxcodeiowidecharwidestring

Proper manipulation of wide char/string in C


OS X Yosemite 10.10.5 XCode 7.2

I've been reading and experimenting whole day long, about wide char/string in C and I still can't make it work.

I'm trying to read a file composed only by wide characters like the followings:

んわらやま    (Japanese)

I want to read only one character at time, to write immediately inside the other file.

int main(int argc, const char * argv[])
{
    FILE    *source, *dest;

    source = fopen( argv[1], "r");
    if (source == NULL) {
        printf ("could not open source file \n");
        exit (1);
    }

//  if [dest] does not exist it is created
    dest = fopen( argv[2], "w+");
    if (dest == NULL) {
        fclose(source);
    
        printf ("could not open dest file \n");
        exit (1);
    }
    fwide(source, 1);
    fwide(dest, 1);

    fileManipulator(source, dest);

    fclose(source);
    fclose(dest);

    return 0;
    }

void fileManipulator(FILE* source, FILE* dest)
{
    wint_t token;

    while ( WEOF != (token = getwc(source))) {
        manipulateToken(token, dest);
    }
}


void manipulateToken(wint_t token, FILE* dest)
{
    char* pre = "- ";
    char* post= " -\n";

    if ( EOF == fputs(pre, dest))
    {
//      error handling
    }
    if ( WEOF == fputwc(token, dest))
    {
//      error handling
    }
    if ( EOF == fputs(post, dest))
    {
//      error handling
    }
}

Here is the output:

- „ -
- Ç -
- ì -
- „ -
- Ç -
- è -
- „ -
- Ç -
- â -
- „ -
- Ç -
- Ñ -
- „ -
- Å -
- æ -

I can understand that my problem probably is about how I read the data but If I think about alternatives I'm totally stuck.

can you help me?

PS: Links to further readings on the argument are much appreciated as well. Documentation on the matter is pretty scarce.


The XCode issue

This issue initially made me think that Jonathan Leffler solution was not working. In fact code produces different output if I run it through XCode CMD+R or through the Terminal.

AFAIK the issue must be some kind of attribute/property/setting XCode uses at run time, since hard-coding source and dest parameters still produces the wrong output.

For the sake of clarity I'm providing the exported scheme for my code:

<?xml version="1.0" encoding="UTF-8"?>
<Scheme
   LastUpgradeVersion = "0720"
   version = "1.3">
   <BuildAction
      parallelizeBuildables = "YES"
      buildImplicitDependencies = "YES">
      <BuildActionEntries>
         <BuildActionEntry
            buildForTesting = "YES"
            buildForRunning = "YES"
            buildForProfiling = "YES"
            buildForArchiving = "YES"
            buildForAnalyzing = "YES">
            <BuildableReference
               BuildableIdentifier = "primary"
               BlueprintIdentifier = "DA36663A1CCF4F8200615958"
               BuildableName = "FileManipulator"
               BlueprintName = "FileManipulator"
               ReferencedContainer = "container:FileManipulator.xcodeproj">
            </BuildableReference>
         </BuildActionEntry>
      </BuildActionEntries>
   </BuildAction>
   <TestAction
      buildConfiguration = "Debug"
      selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
      selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
      shouldUseLaunchSchemeArgsEnv = "YES">
      <Testables>
      </Testables>
      <MacroExpansion>
         <BuildableReference
            BuildableIdentifier = "primary"
            BlueprintIdentifier = "DA36663A1CCF4F8200615958"
            BuildableName = "FileManipulator"
            BlueprintName = "FileManipulator"
            ReferencedContainer = "container:FileManipulator.xcodeproj">
         </BuildableReference>
      </MacroExpansion>
      <AdditionalOptions>
      </AdditionalOptions>
   </TestAction>
   <LaunchAction
      buildConfiguration = "Debug"
      selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
      selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
      launchStyle = "0"
      useCustomWorkingDirectory = "NO"
      ignoresPersistentStateOnLaunch = "NO"
      debugDocumentVersioning = "YES"
      enableAddressSanitizer = "YES"
      debugServiceExtension = "internal"
      allowLocationSimulation = "YES">
      <BuildableProductRunnable
         runnableDebuggingMode = "0">
         <BuildableReference
            BuildableIdentifier = "primary"
            BlueprintIdentifier = "DA36663A1CCF4F8200615958"
            BuildableName = "FileManipulator"
            BlueprintName = "FileManipulator"
            ReferencedContainer = "container:FileManipulator.xcodeproj">
         </BuildableReference>
      </BuildableProductRunnable>
      <CommandLineArguments>
         <CommandLineArgument
            argument = "/Users/Paul/TestDirectory/Source.txt"
            isEnabled = "YES">
         </CommandLineArgument>
         <CommandLineArgument
            argument = "/Users/Paul/TestDirectory/Destination.txt"
            isEnabled = "YES">
         </CommandLineArgument>
      </CommandLineArguments>
      <AdditionalOptions>
         <AdditionalOption
            key = "NSZombieEnabled"
            value = "YES"
            isEnabled = "YES">
         </AdditionalOption>
         <AdditionalOption
            key = "NSDOLoggingEnabled"
            value = "YES"
            isEnabled = "YES">
         </AdditionalOption>
      </AdditionalOptions>
   </LaunchAction>
   <ProfileAction
      buildConfiguration = "Release"
      shouldUseLaunchSchemeArgsEnv = "YES"
      savedToolIdentifier = ""
      useCustomWorkingDirectory = "NO"
      debugDocumentVersioning = "YES">
      <BuildableProductRunnable
         runnableDebuggingMode = "0">
         <BuildableReference
            BuildableIdentifier = "primary"
            BlueprintIdentifier = "DA36663A1CCF4F8200615958"
            BuildableName = "FileManipulator"
            BlueprintName = "FileManipulator"
            ReferencedContainer = "container:FileManipulator.xcodeproj">
         </BuildableReference>
      </BuildableProductRunnable>
   </ProfileAction>
   <AnalyzeAction
      buildConfiguration = "Debug">
   </AnalyzeAction>
   <ArchiveAction
      buildConfiguration = "Release"
      revealArchiveInOrganizer = "YES">
   </ArchiveAction>
</Scheme>

Solution

  • This code seems to work. You probably shouldn't be using fputs() and narrow strings; you should be using fputws() and wide strings: L"- ". Note the use of setlocale(); that is crucial (try omitting it and see what you get).

    #include <locale.h>
    #include <stdio.h>
    #include <stdlib.h>
    #include <wchar.h>
    
    static void fileManipulator(FILE *source, FILE *dest);
    static void manipulateToken(wint_t token, FILE *dest);
    
    int main(int argc, const char *argv[])
    {
        FILE    *source, *dest;
        if (argc != 3)
        {
            fprintf(stderr, "Usage: %s input output\n", argv[0]);
            exit(1);
        }
    
        setlocale(LC_ALL, "");
    
        source = fopen(argv[1], "r");
        if (source == NULL)
        {
            fprintf(stderr, "could not open source file %s\n", argv[1]);
            exit(1);
        }
    
        dest = fopen(argv[2], "w+");
        if (dest == NULL)
        {
            fclose(source);
            fprintf(stderr, "could not open dest file %s\n", argv[2]);
            exit(1);
        }
    
        fwide(source, 1);
        fwide(dest, 1);
    
        fileManipulator(source, dest);
    
        fclose(source);
        fclose(dest);
    
        return 0;
    }
    
    static void fileManipulator(FILE *source, FILE *dest)
    {
        wint_t token;
    
        while (WEOF != (token = getwc(source)))
        {
            manipulateToken(token, dest);
        }
    }
    
    static void manipulateToken(wint_t token, FILE *dest)
    {
        wchar_t *pre = L"- ";
        wchar_t *post = L" -\n";
    
        if (EOF == fputws(pre, dest))
        {
            fprintf(stderr, "Failed to write prefix string\n");
            exit(1);
        }
        if (WEOF == fputwc(token, dest))
        {
            fprintf(stderr, "Failed to write wide character %d\n", (int)token);
            exit(1);
        }
        if (EOF == fputws(post, dest))
        {
            fprintf(stderr, "Failed to write suffix string\n");
            exit(1);
        }
    }
    

    Given a file, data, containing:

    $ cat data
    んわらやま
    $ odx data
    0x0000: E3 82 93 E3 82 8F E3 82 89 E3 82 84 E3 81 BE 0A   ................
    0x0010:
    $
    

    (You won't have odx because I wrote it, but xxd -g 1 data produces more or less equivalent output.) I ran the program (called x37) like this:

    $ x37 data output
    $ cat output
    - ん -
    - わ -
    - ら -
    - や -
    - ま -
    - 
     -
    $ odx output
    0x0000: 2D 20 E3 82 93 20 2D 0A 2D 20 E3 82 8F 20 2D 0A   - ... -.- ... -.
    0x0010: 2D 20 E3 82 89 20 2D 0A 2D 20 E3 82 84 20 2D 0A   - ... -.- ... -.
    0x0020: 2D 20 E3 81 BE 20 2D 0A 2D 20 0A 20 2D 0A         - ... -.- . -.
    0x002E:
    $
    

    Testing on Mac OS X 10.11.4 with GCC (5.3.0, home-built) and Clang (Apple LLVM version 7.3.0 (clang-703.0.29)).

    Given working code, you can experiment to find which changes are crucial. I'd also create functions to report errors with one-line calls instead of needing to write 3 or 4 lines per error. (Actually 'use' is more appropriate than 'create' — I created such a set of functions long ago, and use them continually.)