locked
Unicode related problem in Chinese OS and English OS RRS feed

  • Question

  • Hi all,

    I am currently face a problem. My application able to download the data from https server and save it into a file and the unicode data is shown nicely when I open the file. (the data is readable chinese character)

    But when my application run in the chinese OS then my application are not able save the unicode data nicely in the file. (some of the chinese chars become unknown symbols)

    Below is my coding:

    class CUTF8ToWString
    {
    public:
        CUTF8ToWString()
        {
            m_pWString = NULL;
        }
        CUTF8ToWString(const char* str)
        {
            m_pWString = NULL;
            _alloc(str);
        }
        ~CUTF8ToWString()
        {
            _free();
        }

    public:
        const wchar_t* Data()
        {
            return m_pWString;
        }
        size_t Length()
        {
            return m_pWString ? wcslen(m_pWString) : 0;
        }
        CUTF8ToWString& operator=(const char* str)
        {
            _alloc(str);
            return *this;
        }
        operator const wchar_t*() const
        {
            return m_pWString;
        }
        operator wchar_t*() const
        {
            return m_pWString;

        }
    private:
        wchar_t*    m_pWString;
        void _free()
        {
            if (m_pWString)
            {
                free(m_pWString);
                m_pWString = NULL;
            }
        }
        void _alloc(const char* str)
        {
            _free();
            if (str)
            {
                size_t size = strlen(str);
                int length = MultiByteToWideChar(CP_UTF8, 0, str, size, NULL, 0);

                m_pWString = (wchar_t*)malloc((length+1)*sizeof(wchar_t));
                ZeroMemory(m_pWString, (length+1)*sizeof(wchar_t));

                MultiByteToWideChar(CP_UTF8, 0, str, size, m_pWString, length);
            }
            else
            {
                m_pWString = NULL;
            }
        }

    };

    TCHAR szResponse[200000] = {0};
    char *chOutput = NULL;

    /* I have a function to download the data by using https then store the raw data in the szResponse.

    USES_CONVERSION;
    chOutput = W2A(szResponse);
    CUTF8ToWString strConvert(chOutput);
    CString    strOutput(strConvert);

    /* save into file
    Thursday, April 9, 2009 10:48 AM

Answers

  • Your problems start with szResponse.  You are not getting a string from the server, you are getting bytes.  Change the declaration from TCHAR[] to unsigned char[].  Now you can convert from UTF8 bytes to a string, use MultiByteToWideChar, using code page CP_UTF8.  Now you got a wchar_t[].  Stop there, you've got your Chinese characters in UTF16 encoding, just like the operating system likes them.


    Hans Passant.
    • Marked as answer by guazwk Friday, April 10, 2009 4:21 AM
    Friday, April 10, 2009 1:46 AM
    Moderator

All replies

  • You are converting Unicode to Ansi (W2A), then back again to Unicode (assuming UTF8 encoding), then back again to CString.  The result is bound to look like Chinese, just not somebody Chinese could decipher.  Start by retrieving the encoding used by the web server, it is present in the HTTP header.
    Hans Passant.
    Thursday, April 9, 2009 1:43 PM
    Moderator
  • Hi nobugz,

    I know that the data i received from HTTPS server is in UTF-8. (I put the https weblink in IE then i just change the encoding become UTF-8 then I can read the chinese chars so I said that the data is in UTF-8)

    But I confuse with the way to properly convert the data into the correct type. 
    What is the best practice to do it? 
    Are the converting class I used is right?
     
    I am face the problem when my use my application under chinese OS.. so I think the way I do the converting is not correct. I try to goolge it but until now haven't face any useful information.

    thank you,
    Guaz
    Friday, April 10, 2009 1:31 AM
  • Your problems start with szResponse.  You are not getting a string from the server, you are getting bytes.  Change the declaration from TCHAR[] to unsigned char[].  Now you can convert from UTF8 bytes to a string, use MultiByteToWideChar, using code page CP_UTF8.  Now you got a wchar_t[].  Stop there, you've got your Chinese characters in UTF16 encoding, just like the operating system likes them.


    Hans Passant.
    • Marked as answer by guazwk Friday, April 10, 2009 4:21 AM
    Friday, April 10, 2009 1:46 AM
    Moderator
  • int
    CAsyncSslConnection::SendRquest(
    	LPCTSTR lpszRquestContext,
    	LPTSTR lpReponseResult,
    	UINT unLength
    	)
    {
    	if( m_hRequest==NULL )
    		return FALSE;
    
    	DWORD dwWaitResult = 0;
    	DWORD dwLastError = 0;
    
    	int  nReturn = 1;
    	CWStringToUTF8 strConvert(lpszRquestContext);
    	INTERNET_BUFFERS internetBuff = {0};
    	internetBuff.dwStructSize = sizeof(INTERNET_BUFFERS);
    	internetBuff.lpcszHeader = m_strHeader.c_str();
    	internetBuff.dwHeadersLength = m_strHeader.length();
    	internetBuff.dwHeadersTotal = internetBuff.dwHeadersLength;
    	internetBuff.lpvBuffer = (LPVOID)strConvert.Data();
    	internetBuff.dwBufferLength = strConvert.Length();
    	internetBuff.dwBufferTotal = internetBuff.dwBufferLength;
    
    	ResetEvent(m_hStoppedEvent);
    	DebugLog(0,_T("Send one new rquest...\r\n"));
    	do {
    		dwWaitResult = ::WaitForSingleObject(m_hStopEvent,10);
    		if( dwWaitResult==WAIT_OBJECT_0){
    			nReturn=2;
    			break;
    		}
    		DebugLog(0,_T("HttpSendRequestEx...\r\n"));
    		{
    			// pre-set the flags
    			DWORD dwFlags;
    			DWORD dwBuffLen = sizeof(dwFlags);
    
    			InternetQueryOption (m_hRequest, INTERNET_OPTION_SECURITY_FLAGS,
    				(LPVOID)&dwFlags, &dwBuffLen);
    
    			dwFlags |= SECURITY_FLAG_IGNORE_UNKNOWN_CA;
    			InternetSetOption (m_hRequest, INTERNET_OPTION_SECURITY_FLAGS,
    				&dwFlags, sizeof (dwFlags) );
    		}
    		if( !HttpSendRequestEx(
    			m_hRequest,
    			&internetBuff,
    			NULL,
    			0,
    			(DWORD)&m_requestContext))
    		{
    			dwLastError = ::GetLastError();
    			if( dwLastError==ERROR_IO_PENDING ){
    				dwLastError = ::GetLastError();
    				if (dwLastError != ERROR_IO_PENDING){
    					break;
    				}
    				HANDLE hEvents[] = {m_hRequestCompleteEvent,m_hStopEvent};
    				DWORD dwWaitResult = ::WaitForMultipleObjects(2,hEvents,FALSE,INFINITE);
    				if( dwWaitResult==WAIT_OBJECT_0+1 ){
    					nReturn=2;
    					break;
    				}else if( dwWaitResult!= WAIT_OBJECT_0 ){
    					break;
    				}
    				if( m_dwLastError!=ERROR_SUCCESS ){
    					break;
    				}
    			}
    			nReturn = 0;
    			break;
    		}
    	}while(1);
    	do{
    		if( nReturn==2 )
    			break;
    		DebugLog(0,_T("HttpEndRequest...\r\n"));
    		if (!HttpEndRequest(m_hRequest, NULL, HSR_INITIATE, (DWORD)&m_requestContext)){
    			dwLastError = ::GetLastError();
    			if( dwLastError==ERROR_IO_PENDING ){
    				dwLastError = ::GetLastError();
    				if (dwLastError != ERROR_IO_PENDING){
    					break;
    				}
    				HANDLE hEvents[] = {m_hRequestCompleteEvent,m_hStopEvent};
    				DWORD dwWaitResult = ::WaitForMultipleObjects(2,hEvents,FALSE,INFINITE);
    				if( dwWaitResult==WAIT_OBJECT_0+1 ){
    					nReturn=2;
    					break;
    				}else if( dwWaitResult!= WAIT_OBJECT_0 ){
    					break;
    				}
    				if( m_dwLastError!=ERROR_SUCCESS ){
    					break;
    				}
    			}
    		}
    		break;
    	}while(0);
    	if( nReturn==0 ){
    		nReturn = GetRquestResult(lpReponseResult,unLength);
    	}
    	SetEvent(m_hStoppedEvent);
    	return nReturn;
    }
    
    int 
    CAsyncSslConnection::GetRquestResult(
    		LPTSTR lpResult,
    		UINT unLength
    	)
    {
    	if( m_hRequest==NULL )
    		return FALSE;
    	int  nReturn = 1;
    	DWORD dwLastError = 0;
    	char* lpszHeaderData = NULL;
    	char* lpszContentData = NULL; 
    	DWORD dwWaitResult = 0;
    	DWORD dwSize = 0;
        do
        {
    		dwWaitResult=WaitForSingleObject(m_hStopEvent,10);
    		if( dwWaitResult==WAIT_OBJECT_0){
    			nReturn=2;
    			break;
    		}
    		BOOL bRet = HttpQueryInfo(
    			m_hRequest, 
    			HTTP_QUERY_RAW_HEADERS_CRLF,
    			lpszHeaderData, 
    			&dwSize, 
    			NULL);
    		if( !bRet ){
    			dwLastError = ::GetLastError();
    			if( dwLastError==ERROR_INSUFFICIENT_BUFFER ){
    				if( lpszHeaderData ){
    					delete [] lpszHeaderData;
    				}
    				lpszHeaderData = new char[dwSize+1];
    				memset(lpszHeaderData,0,dwSize+1);
    				continue;
    			}else if( dwLastError==ERROR_IO_PENDING ){
    				HANDLE hEvents[] = {m_hRequestCompleteEvent,m_hStopEvent};
    				DWORD dwWaitResult = ::WaitForMultipleObjects(2,hEvents,FALSE,INFINITE);
    				if( dwWaitResult==WAIT_OBJECT_0+1 ){
    					nReturn=2;
    					break;
    				}else if( dwWaitResult!= WAIT_OBJECT_0 ){
    					break;
    				}
    				if( m_dwLastError!=ERROR_SUCCESS ){
    					break;
    				}
    			}
    			break;
    		}
    		nReturn = 0;
    		break;
    	}while(1);
    	if( lpszHeaderData ){
    		delete [] lpszHeaderData;
    		lpszHeaderData = NULL;
    	}
    	DWORD dwTotalSize = 0;
    	_bstr_t strReceivedString=_T("");
    	do{
    		dwSize = 0;
    		DebugLog(0,_T("InternetQueryDataAvailable function call...\r\n"));
    		if (!InternetQueryDataAvailable(m_hRequest,&dwSize,0,(DWORD)&m_requestContext)){
    			dwLastError = ::GetLastError();
    			if( dwLastError==ERROR_IO_PENDING ){
    				HANDLE hEvents[] = {m_hRequestCompleteEvent,m_hStopEvent};
    				DWORD dwWaitResult = ::WaitForMultipleObjects(2,hEvents,FALSE,INFINITE);
    				if( dwWaitResult==WAIT_OBJECT_0+1 ){
    					nReturn=2;
    					break;
    				}else if( dwWaitResult!= WAIT_OBJECT_0 ){
    					break;
    				}
    				if( m_dwLastError!=ERROR_SUCCESS ){
    					if( WaitForSingleObject(m_hStopEvent,100)==WAIT_OBJECT_0 ){
    						nReturn=2;
    						break;
    					}
    					continue;
    				}
    			}
    			break;
    		}
    		if( dwSize>0 ){
    			dwTotalSize += dwSize;
    			// Allocates a buffer of the size returned by InternetQueryDataAvailable
    			lpszContentData = new char[dwSize+1];
    			memset(lpszContentData,0,dwSize+1);
    			DWORD dwDownloaded = 0;
    			// Reads the data from the HINTERNET handle.
    			DebugLog(0,_T("InternetReadFile function call...\r\n"));
    			if(!InternetReadFile(m_hRequest,(LPVOID)lpszContentData,dwSize,&dwDownloaded))
    			{
    				DWORD dwLastError = ::GetLastError();
    				if( dwLastError==ERROR_IO_PENDING ){
    					HANDLE hEvents[] = {m_hRequestCompleteEvent,m_hStopEvent};
    					DWORD dwWaitResult = ::WaitForMultipleObjects(2,hEvents,FALSE,INFINITE);
    					if( dwWaitResult==WAIT_OBJECT_0+1 ){
    						nReturn=2;
    						break;
    					}else if( dwWaitResult!= WAIT_OBJECT_0 ){
    						break;
    					}
    					if( m_dwLastError!=ERROR_SUCCESS ){
    						if( WaitForSingleObject(m_hStopEvent,100)==WAIT_OBJECT_0 ){
    							nReturn=2;
    							break;
    						}
    						// read file timeout, continue, internet has the default timeout
    						if( lpszContentData){
    							delete [] lpszContentData;
    							lpszContentData = NULL;
    						}
    						continue;
    					}
    				}	
    				break;
    			}
    
    			strReceivedString += lpszContentData;
    			delete [] lpszContentData;
    			lpszContentData = NULL;
    			if( dwDownloaded==0 ){
    				nReturn = 0;
    				break;
    			}
    		}else{
    			nReturn = 0;
    			break;
    		}
        } while(1);
    	if( lpszContentData){
    		delete [] lpszContentData;
    		lpszContentData = NULL;
    	}
    	if( nReturn==0){
    		UINT unReturnLength = 0;
    		if( strReceivedString.length() ){
    			unReturnLength = _tcslen(strReceivedString);
    		}
    		//UINT nCopy = unReturnLength>unLength?unLength:unReturnLength;
    		UINT nCopy = unReturnLength;
    		_tcsncpy(lpResult,strReceivedString,nCopy);
    		lpResult[nCopy]=0;
    	}
    	return nReturn;
    }
    
    TCHAR szResponse[200000] = {0};
    char *chOutput = NULL;
    m_pHTTPSConnection->SendRequest(szResponse);

    So means that I need to change the szResponse to unsigned char? or my https function already wrong?

    thanks for your help in advanced :)
    Friday, April 10, 2009 2:50 AM
  • I able to fixed my problem by modified the https class. 

    I just change _bstr_t strReceivedString=_T(""); to CByteArray byteReceivedString;
    Friday, April 10, 2009 4:21 AM