MSDN > フォーラム ホーム > Visual C++ Language > Unable to read and display chinese character VC++ 6.0
質問する質問する
 

回答済みUnable to read and display chinese character VC++ 6.0

  • 2009年11月7日 14:12meghnapramanik ユーザーのメダルユーザーのメダルユーザーのメダルユーザーのメダルユーザーのメダル
     
    Hi All:

    The following is my problem

    1. I have to read a file in binary mode.
    2. We read the file as per the structure i.e.
          struct data
          {
                int length;
                byte* pData;
          }
    3. Now when we read pData in case of english data it is proper but we are unable to read when it is unicode data.
    4. We have used fgetws to read those many bytes into unicode but it returns the ascii representation of the text.

    What we need is to read these bytes which are in chinese, arabic and then display the data in the text box in the language and then store the data in database.

    Can someone help???

    Thanks


     

回答

  • 2009年11月8日 2:20Llelan D. ユーザーのメダルユーザーのメダルユーザーのメダルユーザーのメダルユーザーのメダル
     回答済みコードあり
    Unfortunately, your explanation lacks detail and a compilable example which makes it difficult to pin down exactly what you are trying to do. You do not specify if the text in your file is encoded as UTF-8, UTF-16L, UTF-16B, or some other codepage.

    If it is in UTF-16L, the _fgetts() (which maps to fgetws() in Unicode mode) will read UTF-16 characters from a file open in binary mode. The example shows how:

    Data includes the text "Some characters are in 日本語"
    00000000h: 34 00 00 00 53 00 6F 00 6D 00 65 00 20 00 63 00 ; 2...S.o.m.e. .c.
    00000010h: 68 00 61 00 72 00 61 00 63 00 74 00 65 00 72 00 ; h.a.r.a.c.t.e.r.
    00000020h: 73 00 20 00 61 00 72 00 65 00 20 00 69 00 6E 00 ; s. .a.r.e. .i.n.
    00000030h: 20 00 E5 65 2C 67 9E 8A                         ;  .åe,gžŠ
    Code
    #include <tchar.h>
    #include <stdio.h>
    
    
    void main()
    {
    	FILE *pFile;
    	if (_tfopen_s(&pFile, _T("testUTF-16.dat"), _T("rb")) != 0) {
    		_tperror(_T("Open Error"));
    		return;
    	}
    
    	int textByteLength;
    	if (fread(&textByteLength, sizeof(textByteLength), 1, pFile) != 1) {
    		_tperror(_T("Length Read Error"));
    		fclose(pFile);
    		return;
    	}
    	int textLength = textByteLength / sizeof(TCHAR) + 1;
    
    	TCHAR *szText = (TCHAR *)calloc(textLength, sizeof(TCHAR));
    	if (szText == NULL) {
    		_tperror(_T("Out of Memory Error"));
    		fclose(pFile);
    		return;
    	}
    
    	if (_fgetts(szText, textLength, pFile) == NULL) {
    		_tperror(_T("Text Read Error"));
    		free(szText);
    		fclose(pFile);
    		return;
    	}
    
    	fclose(pFile);
    
    	// Do something with the Unicode string.
    
    	free(szText);
    }
    

    However, if your text is in UTF-8, the string IO functions will ignore any "css=UTF-8" in the fopen() when you open the file as binary. To get your text as a TCHAR string (which maps to WCHAR in Unicode mode), you must use MultiByteToWideChar() . This example code shows how:

    Data includes the text "Some characters are in 日本語"
    00000000h: 20 00 00 00 53 6F 6D 65 20 63 68 61 72 61 63 74 ;  ...Some charact
    00000010h: 65 72 73 20 61 72 65 20 69 6E 20 E6 97 A5 E6 9C ; ers are in æ—¥æœ
    00000020h: AC E8 AA 9E                                     ; ¬èªž
    Code
    #include <Windows.h>
    #include <tchar.h>
    #include <stdio.h>
    
    
    void main()
    {
    	FILE *pFile;
    	if (_tfopen_s(&pFile, _T("testUTF-8.dat"), _T("rb")) != 0) {
    		_tperror(_T("Open Error"));
    		return;
    	}
    
    	int utf8TextLength;
    	if (fread(&utf8TextLength, sizeof(utf8TextLength), 1, pFile) != 1) {
    		_tperror(_T("Length Read Error"));
    		fclose(pFile);
    		return;
    	}
    
    	char *szUTF8String = (char *)calloc(utf8TextLength + 1, sizeof(char));
    	if (szUTF8String == NULL) {
    		_tperror(_T("Out of Memory Error"));
    		fclose(pFile);
    		return;
    	}
    
    	if (fread(szUTF8String, sizeof(char), utf8TextLength, pFile) != utf8TextLength) {
    		_tperror(_T("Text Read Error"));
    		free(szUTF8String);
    		fclose(pFile);
    		return;
    	}
    
    	fclose(pFile);
    
    	int bufferLength = MultiByteToWideChar(CP_UTF8, 0, szUTF8String, -1, NULL, 0);
    	TCHAR *szText = (TCHAR *)calloc(bufferLength, sizeof(TCHAR));
    	if (szText == NULL) {
    		_tperror(_T("Out of Memory Error"));
    		free(szUTF8String);
    		return;
    	}
    
    	if (MultiByteToWideChar(CP_UTF8, 0, szUTF8String, -1, szText, bufferLength)) {
    		_ftprintf_s(stderr, _T("UTF-8 to UTF-16 Conversion Error: %08xh\n"), GetLastError());
    		free(szUTF8String);
    		return;
    	}
    
    	free(szUTF8String);
    
    	// Do something with the Unicode string.
    
    	free(szText);
    }
    
    

    If you are using some other codepage (like Shift-JIS), you do the same as in UTF-8 but use the identifier for the codepage in the MultiByteToWide() function.

    I hope that helps.

すべての返信

  • 2009年11月7日 19:35Brian MuthMVPユーザーのメダルユーザーのメダルユーザーのメダルユーザーのメダルユーザーのメダル
     
    How are the characters encoded in the file? Does it depend on a codepage?
  • 2009年11月8日 2:20Llelan D. ユーザーのメダルユーザーのメダルユーザーのメダルユーザーのメダルユーザーのメダル
     回答済みコードあり
    Unfortunately, your explanation lacks detail and a compilable example which makes it difficult to pin down exactly what you are trying to do. You do not specify if the text in your file is encoded as UTF-8, UTF-16L, UTF-16B, or some other codepage.

    If it is in UTF-16L, the _fgetts() (which maps to fgetws() in Unicode mode) will read UTF-16 characters from a file open in binary mode. The example shows how:

    Data includes the text "Some characters are in 日本語"
    00000000h: 34 00 00 00 53 00 6F 00 6D 00 65 00 20 00 63 00 ; 2...S.o.m.e. .c.
    00000010h: 68 00 61 00 72 00 61 00 63 00 74 00 65 00 72 00 ; h.a.r.a.c.t.e.r.
    00000020h: 73 00 20 00 61 00 72 00 65 00 20 00 69 00 6E 00 ; s. .a.r.e. .i.n.
    00000030h: 20 00 E5 65 2C 67 9E 8A                         ;  .åe,gžŠ
    Code
    #include <tchar.h>
    #include <stdio.h>
    
    
    void main()
    {
    	FILE *pFile;
    	if (_tfopen_s(&pFile, _T("testUTF-16.dat"), _T("rb")) != 0) {
    		_tperror(_T("Open Error"));
    		return;
    	}
    
    	int textByteLength;
    	if (fread(&textByteLength, sizeof(textByteLength), 1, pFile) != 1) {
    		_tperror(_T("Length Read Error"));
    		fclose(pFile);
    		return;
    	}
    	int textLength = textByteLength / sizeof(TCHAR) + 1;
    
    	TCHAR *szText = (TCHAR *)calloc(textLength, sizeof(TCHAR));
    	if (szText == NULL) {
    		_tperror(_T("Out of Memory Error"));
    		fclose(pFile);
    		return;
    	}
    
    	if (_fgetts(szText, textLength, pFile) == NULL) {
    		_tperror(_T("Text Read Error"));
    		free(szText);
    		fclose(pFile);
    		return;
    	}
    
    	fclose(pFile);
    
    	// Do something with the Unicode string.
    
    	free(szText);
    }
    

    However, if your text is in UTF-8, the string IO functions will ignore any "css=UTF-8" in the fopen() when you open the file as binary. To get your text as a TCHAR string (which maps to WCHAR in Unicode mode), you must use MultiByteToWideChar() . This example code shows how:

    Data includes the text "Some characters are in 日本語"
    00000000h: 20 00 00 00 53 6F 6D 65 20 63 68 61 72 61 63 74 ;  ...Some charact
    00000010h: 65 72 73 20 61 72 65 20 69 6E 20 E6 97 A5 E6 9C ; ers are in æ—¥æœ
    00000020h: AC E8 AA 9E                                     ; ¬èªž
    Code
    #include <Windows.h>
    #include <tchar.h>
    #include <stdio.h>
    
    
    void main()
    {
    	FILE *pFile;
    	if (_tfopen_s(&pFile, _T("testUTF-8.dat"), _T("rb")) != 0) {
    		_tperror(_T("Open Error"));
    		return;
    	}
    
    	int utf8TextLength;
    	if (fread(&utf8TextLength, sizeof(utf8TextLength), 1, pFile) != 1) {
    		_tperror(_T("Length Read Error"));
    		fclose(pFile);
    		return;
    	}
    
    	char *szUTF8String = (char *)calloc(utf8TextLength + 1, sizeof(char));
    	if (szUTF8String == NULL) {
    		_tperror(_T("Out of Memory Error"));
    		fclose(pFile);
    		return;
    	}
    
    	if (fread(szUTF8String, sizeof(char), utf8TextLength, pFile) != utf8TextLength) {
    		_tperror(_T("Text Read Error"));
    		free(szUTF8String);
    		fclose(pFile);
    		return;
    	}
    
    	fclose(pFile);
    
    	int bufferLength = MultiByteToWideChar(CP_UTF8, 0, szUTF8String, -1, NULL, 0);
    	TCHAR *szText = (TCHAR *)calloc(bufferLength, sizeof(TCHAR));
    	if (szText == NULL) {
    		_tperror(_T("Out of Memory Error"));
    		free(szUTF8String);
    		return;
    	}
    
    	if (MultiByteToWideChar(CP_UTF8, 0, szUTF8String, -1, szText, bufferLength)) {
    		_ftprintf_s(stderr, _T("UTF-8 to UTF-16 Conversion Error: %08xh\n"), GetLastError());
    		free(szUTF8String);
    		return;
    	}
    
    	free(szUTF8String);
    
    	// Do something with the Unicode string.
    
    	free(szText);
    }
    
    

    If you are using some other codepage (like Shift-JIS), you do the same as in UTF-8 but use the identifier for the codepage in the MultiByteToWide() function.

    I hope that helps.