locked
How can i encode each hebrew string ? RRS feed

  • Question

  • In the end of the loop i'm getting a List and inside i see in each index:

    ������� ������� ����� �����

    This is since the text is in Hebrew.
    The encoding code is: 65001

    System.Text.Encoding.GetEncoding(65001)

    This is the original method:

    public List<string> GetResponsers(string contents)
                {
                    List<string> threadList = new List<string>();
                    int f = 0;
                    int startPos = 0;
                    while (true)
                    {
                        string firstTag = "<FONT CLASS='text16b'>";
                        //string firstTag = ";>";
                        string lastTag = "&n";
                        //string lastTag = ":בתאריך";
                        f = contents.IndexOf(firstTag, startPos);
                        if (f == -1)
                        {
                            break;
                        }
                        int g = contents.IndexOf(lastTag, f);
                        startPos = g + lastTag.Length;
                        string responser = contents.Substring(f + 2, g - f - 2);
                        threadList.Add(responser);
                    }
                    return threadList;
                }

    And this is the method after i added encoding part but it didn't change anything:

    public List<string> GetResponsers(string contents)
            {
                List<string> threadList = new List<string>();
                int f = 0;
                int startPos = 0;
                while (true)
                {
                    string firstTag = "<FONT CLASS='text16b'>";
                    //string firstTag = ";>";
                    string lastTag = "&n";
                    //string lastTag = ":בתאריך";
                    f = contents.IndexOf(firstTag, startPos);
                    if (f == -1)
                    {
                        break;
                    }
                    int g = contents.IndexOf(lastTag, f);
                    startPos = g + lastTag.Length;
                    string responser = contents.Substring(f + 2, g - f - 2);
                    Encoding iso = Encoding.GetEncoding(65001);
                    byte[] isoBytes = iso.GetBytes(responser);
                    string ff = iso.GetString(isoBytes);
                    responser = ff;
                    threadList.Add(responser);
                }
                return threadList;
            }


    How do i apply the encoding for each responser ?

    Sunday, July 27, 2014 1:33 PM

Answers

  • I solved it byt this way:

    public string GetResponsersFN(string filename)
            {
                string str = "";
                using (WebClient client = new WebClient()) 
                {                client.Headers.Add(HttpRequestHeader.ContentType, "charset=windows-1255");              
                    str = client.DownloadString(filename);
                   
                    
                }           
                return GetResponsers(str);
    
            }

    Then the original method not changed:

    public string GetResponsers(string contents)
            {
                string responser = "";
                List<string> threadList = new List<string>();
                int f = 0;
                int startPos = 0;
                while (true)
                {
                    string firstTag = "<FONT CLASS='text16b'>";
                    string lastTag = "&n";
                    f = contents.IndexOf(firstTag, startPos);
                    if (f == -1)
                    {
                        break;
                    }
                    int g = contents.IndexOf(lastTag, f);
                    startPos = g + lastTag.Length;
                    responser = contents.Substring(f + 22, g - f - 22);
                    
                    threadList.Add(responser);
                }
                return responser;
            }

    Working no problems.

    • Marked as answer by Chocolade1972 Sunday, July 27, 2014 3:58 PM
    Sunday, July 27, 2014 3:56 PM

All replies

  • Step through the original code and the new code and make sure all the intermediate values are the same.  Sine you are encoding some of the string constants you are using have to be change.  for example the line below :

    string lastTag = "&n";

    "&n" may be different after you use encoding 65001.


    jdweng

    Sunday, July 27, 2014 3:40 PM
  • I solved it byt this way:

    public string GetResponsersFN(string filename)
            {
                string str = "";
                using (WebClient client = new WebClient()) 
                {                client.Headers.Add(HttpRequestHeader.ContentType, "charset=windows-1255");              
                    str = client.DownloadString(filename);
                   
                    
                }           
                return GetResponsers(str);
    
            }

    Then the original method not changed:

    public string GetResponsers(string contents)
            {
                string responser = "";
                List<string> threadList = new List<string>();
                int f = 0;
                int startPos = 0;
                while (true)
                {
                    string firstTag = "<FONT CLASS='text16b'>";
                    string lastTag = "&n";
                    f = contents.IndexOf(firstTag, startPos);
                    if (f == -1)
                    {
                        break;
                    }
                    int g = contents.IndexOf(lastTag, f);
                    startPos = g + lastTag.Length;
                    responser = contents.Substring(f + 22, g - f - 22);
                    
                    threadList.Add(responser);
                }
                return responser;
            }

    Working no problems.

    • Marked as answer by Chocolade1972 Sunday, July 27, 2014 3:58 PM
    Sunday, July 27, 2014 3:56 PM