ccharuppercaselowercaseturkish

How to convert uppercase/lowercase turkish letters to each other?


I made a function that converts upper case to lower case using C.

#include <stdio.h>
#include <stdlib.h>

//İi Iı Ğğ Şş Çç Üü Öö
char toUpLow(char letter)
{
    if (letter >= 'A' && letter <= 'Z') {
        return letter - 'A' + 'a';
    }
    else if (letter >= 'a' && letter <= 'z') {
        return letter - 'a' + 'A';
    }
    else {
        return -1;
    }
}

int main()
{
    char myChar;
    printf("Enter a character: ");
    scanf("%c", &myChar);
    printf("%c", toUpLow(myChar));

    return 0;
}

I want to add Turkish letters.

char toUpLow(char letter)
{
    if (letter == 'İ') {
        printf("i");
    }
    else if (letter == 'i')
    {
        printf("İ");
    }
    else if (letter == 'I')
    {
        printf("ı");
    }
    else if (letter == 'ı')
    {
        printf("I");
    }
    else if (letter == 'Ğ')
    {
        printf("ğ");
    }
    else if (letter == 'ğ')
    {
        printf("Ğ");
    }
    else if (letter >= 'A' && letter <= 'Z') {
        return letter - 'A' + 'a';
    }
    else if (letter >= 'a' && letter <= 'z') {
        return letter - 'a' + 'A';
    }
    else {
        return -1;
    }
}

I tried to add Turkish letters with if / else and I got this error:

uplowfunction.c:22:24: warning: multi-character character constant [-Wmultichar]

Is there a way to do this?


Solution

  • The error message indicates that you are not using a single byte encoding for the Turkish letters such as ISO8859-9, Windows code page 1254 or MS/DOS code page 857. You might using the UTF-8 encoding for Unicode code points where non ASCII characters are represented using sequences of 2 to 4 bytes.

    Non-ASCII characters cannot be used in character constants, or more precisely should not be used in character constants as they would be parsed as multi character constants, which are error prone and non portable.

    To convert case in UTF-8 strings, you should either use wide characters or convert full character strings instead of single characters. Beware that the length of the converted string may be different from the length of the original string: strlen("İ") != strlen("i")

    Here is a simplistic implementation:

    #include <stdlib.h>
    #include <string.h>
    
    static const char * const tcase[] = {
        "İ", "I",
        "I", "ı",
        "ğ", "Ğ",
        // ...
    };
    
    char *conver_case_turkish(const char *s) {
        size_t len = strlen(s);
        size_t i = 0, j = 0, k;
        size_t ncase = sizeof(tcase) / sizeof(*tcase);
        char *dest = malloc(len * 2 + 1);
    
        if (dest) {
            while (i < len) {
                char c = s[i];
                if (c >= 'A' && c <= 'Z' && c != 'I') {
                    dest[j++] = c - 'A' + 'a';
                    i++;
                } else
                if (c >= 'a' && c <= 'z' && c != 'i') {
                    dest[j++] = c - 'a' + 'A';
                    i++;
                } else {
                    for (k = 0; k < ncase; k++) {
                         size_t len1 = strlen(tcase[k]);
                         size_t len2 = strlen(tcase[k ^ 1]);
                         if (!strncmp(s + i, tcase[k], len1)) {
                             strcpy(dest + j, tcase[k ^ 1]);
                             i += len1;
                             j += len2;
                             break;
                         }
                    }
                    if (k == ncase) {
                        dest[j++] = c;
                        i++;
                    }
                }
            }
            dest[j] = '\0';
        }
        return dest;
    }