Hi,
For last one week I have been trying to do conversions between UTF8, Multibyte, Wide Character and Ascii... Finally managed to code a working program... Thought this might help some one, so posting it here... If there are any errors, please point out... If you have any doubts please ask...
#include <stdio.h>
#include <stdlib.h>
#include "locale.h"
#include "string.h"
#define WCMAXLEN 20
#define MBMAXLEN 10
int main(void) {
/* Temp Variable */
int iTemp = 0;
/* Choice of the user */
int iChoice = 0;
/* Length of the string */
int iLength = 0;
/* MB and WC string */
char acString[MBMAXLEN] = "";
wchar_t awArr[WCMAXLEN];
/* File Pointer */
FILE *pFile = NULL;
/* Pointer to character */
char *pcOut;
/* Loop Index */
int iLoopIndex = 0;
do {
/* Display the menu */
system("cls");
printf("\n 1. Japanese");
printf("\n 2. Ascii to Wide Character");
printf("\n 3. Ascii to UTF-8");
printf("\n 4. Wide Character to UTF-8");
printf("\n 5. UTF-8 to Wide Character");
printf("\n Enter your choice [0 to quit]: ");
/* Get the input */
scanf("%d", &iChoice);
switch (iChoice) {
case 1:
{
/* Multibyte to Wide Character */
/* Copy the japansese chracter */
strcpy(acString, "\x0e\x41\x71\x0f");
/* Set the locale and check the return value for error */
if (setlocale(LC_ALL, "ja_JP.utf-8") == NULL) {
printf("\n Locale failed");
}
iLength = strlen(acString);
/* Convert to side char */
iTemp = mbstowcs(awArr, acString, iLength + 1);
awArr[iLength + 1] = L'\0';
printf("\n Wide character string: %ls", awArr);
printf("\n Length : %d",iLength);
}
break;
case 2:
{
/* ASII to Wide Character */
printf("\n Enter a String [size <= 5 chars] : ");
/* Scan the string using wsanf */
wscanf(L"%ls",awArr);
printf("\n Wide Character String : %ls",awArr);
/* Write to file */
pFile = fopen("wide.txt", "a+");
if (pFile != NULL) {
/* Wide character file operation */
fwprintf(pFile, L"%ls\n", awArr);
fclose(pFile);
}
else {
printf("\n Error in opening...");
}
}
break;
case 3:
{
/* ASCII to UTF8 */
printf("\n Enter a String [size <= 5 chars] : ");
fgets(acString, sizeof(acString),stdin);
iLength = strlen(acString);
pFile = fopen("utf.txt", "a+");
if (pFile == NULL) {
printf("\n Error in opening...");
}
else {
printf("\n UTF8 String : ");
for(iLoopIndex=0;iLoopIndex<iLength;iLoopIndex++){
/* if the character is less than 128 then leave it as it is since anything less than 128 is reresented in binary as 0xxxxxxx */
if(acString[iLoopIndex] < 128){
pcOut = (char *)calloc(2, sizeof(char));
pcOut[0] = acString[iLoopIndex];
pcOut[1] = '\0';
}
else{
/* If the character is greater than 128, then it is represented as 110xxxxx 10xxxxxx ie.e. 2 bytes. So for getting the first byte we right shift the character 6 times and or it with 0xC0 (11000000) i.e. c>>6 = 000xxx, then 000xxxxx OR 11000000 = 110xxxxx. For the second byte we need the lower 6 bits, so just block the first 2 bits i.e. (00111111 AND xxxxxxxx) OR 10000000 = 10xxxxxx */
pcOut = (char *)calloc(3, sizeof(char));
pcOut[1] = (acString[iLoopIndex] >> 6) | 0xC0;
pcOut[0] = (acString[iLoopIndex] & 0x3F) | 0x80;
pcOut[2] = '\0';
}
/* Wide character file operation */
fprintf(pFile, "%s", pcOut);
printf("%s",pcOut);
free(pcOut);
}
fclose(pFile);
}
}
break;
case 4:
/* Wide Character to UTF8 */
{
/* Write to file */
pFile = fopen("wide.txt", "r");
if (pFile != NULL) {
/* Wide character file operation */
while(fgetws(awArr, WCMAXLEN, pFile)){
printf("\n Wide character string: %ls", awArr);
if (setlocale(LC_ALL, "en_US.utf-8") == NULL) {
printf("\n Locale failed");
}
/* Convert to multibyte */
iLength = wcstombs(acString, awArr, WCMAXLEN);
printf("\n UTF8 String : ");
/* Convert to UTF */
for(iLoopIndex=0;iLoopIndex<iLength-1;iLoopIndex++){
/* if the character is less than 128 then leave it as it is since anything less than 128 is represented in binary as 0xxxxxxx */
if(awArr[iLoopIndex] < 128){
pcOut = (char *)calloc(2, sizeof(char));
pcOut[0] = acString[iLoopIndex];
pcOut[1] = '\0';
}
else{
/* If the character is greater than 128, then it is represented as 110xxxxx 10xxxxxx ie.e. 2 bytes. So for getting the first byte we right shift the character 6 times and or it with 0xC0 (11000000) i.e. c>>6 = 000xxx, then 000xxxxx OR 11000000 = 110xxxxx. For the second byte we need the lower 6 bits, so just block the first 2 bits i.e. (00111111 AND xxxxxxxx) OR 10000000 = 10xxxxxx */
pcOut = (char *)calloc(3, sizeof(char));
pcOut[1] = (awArr[iLoopIndex] >> 6) | 0xC0;
pcOut[0] = (awArr[iLoopIndex] & 0x3F) | 0x80;
pcOut[2] = '\0';
}
printf("%s",pcOut);
}
}
}
else{
printf("\n Error in opening...");
}
}
break;
case 5:
/* UTF8 to Wide Character */
{
pFile = fopen("utf.txt", "r");
if (pFile != NULL) {
/* Reading directly as wide character */
while(fgetws(awArr, WCMAXLEN, pFile)){
printf("\n Wide Character : %ls",awArr);
}
fclose(pFile);
}
else{
printf("\n Error in opening...");
}
}
break;
default:
;
break;
}
printf("\n\n Press any key to continue...");
getchar();
} while (iChoice != 0);
return 0;
}