none
关于C#的WebRequest的乱码问题 RRS feed

  • 问题

  •         public string gethttpcode(string uri,string method,string encode)
            {
                try
                {
                    WebRequest req = WebRequest.Create(uri);
                    req.Method = method;
                    req.Timeout = 10000;
                    WebResponse res = req.GetResponse();
                    Stream ReceiveStream = res.GetResponseStream();
                    StreamReader sr = new StreamReader(ReceiveStream);
                    string resultstring = sr.ReadToEnd();
                    return resultstring;
                }
                catch
                {
                    return "获取失败!";
                }
            }


    如上函数,获取UTF8的网页正常,获取GBK(或GB2312)就出现乱码。用Encoding将gb2312的网页转成了UTF8后乱码都变成了问号,也就是还是乱码,怎么解决?
    2009年11月1日 13:05

答案

  • 你需要先得到网页编码。下面这段代码可以解决大部分的网页
      private void button3_Click(object sender, EventArgs e)
        {
          String[] UrlList = {
                             "http://www.kbs.co.kr/",
                             "http://rosemary.kbs.co.kr/",
                             "http://sbcx.saic.gov.cn/trade/index.jsp",
                             "http://www.csdn.net",
                             "http://www.google.cn/",
                             "http://www.baidu.com",
                             "http://www.javaeye.com/",
                             "http://blog.163.com/kel_scott66/blog/static/1150539632009614115635700/",
                             "http://www.sina.com.hk/",
                             "http://www.rthk.org.hk/"
                            };
          foreach (String u in UrlList)
          {
            textBox1.Text = GetWebPage(u, "GET");
            MessageBox.Show(u);
          }
        }
    
        public string GetWebPage(string uri, string method)
        {
          try
          {
            HttpWebRequest req = (HttpWebRequest)WebRequest.Create(uri);
            req.Method = method;
            req.Timeout = 10000;
            req.UserAgent = "Mozilla/5.0 (Windows; U; Windows NT 5.2; zh-CN; rv:1.9.1.4) Gecko/20091016 Firefox/3.5.4 (.NET CLR 3.5.30729)";
            String ReturnedEncoding = "";
            HttpWebResponse res = req.GetResponse() as HttpWebResponse;
            Stream ReceiveStream = res.GetResponseStream();
            StreamReader sr = new StreamReader(ReceiveStream, Encoding.UTF8);
            string ReturnedContent = sr.ReadToEnd();
    
            if (ReturnedEncoding == "")
            {
              //string h = "<meta http-equiv='Content-Type' content='text/html; charset=big5'>";
              Regex reg_charset = new Regex(@"charset\b\s*=\s*(?<charset>[^""|^'']*)");
              if (reg_charset.IsMatch(ReturnedContent))
              {
                ReturnedEncoding = reg_charset.Match(ReturnedContent).Groups["charset"].Value;
              }
            }
    
            if (ReturnedEncoding == "")
            {
              String ct = res.ContentType.ToLower().Replace(" ", "");
              if (ct.IndexOf("charset") > -1)
              {
                ReturnedEncoding = ct.Substring(ct.IndexOf("charset=") + 8);
              }
            }
    
            if (ReturnedEncoding == "")
            {
              ReturnedEncoding = res.ContentEncoding;
            }
    
    
            if (ReturnedEncoding == "")
            {
              ReturnedEncoding = res.CharacterSet;
            }
    
            Encoding HtmlEncoding = Encoding.Default;
            if (ReturnedEncoding != "")
            {
              HtmlEncoding = Encoding.GetEncoding(ReturnedEncoding);
            }
    
            req = (HttpWebRequest)WebRequest.Create(uri);
            req.Method = method;
            req.Timeout = 10000;
            req.UserAgent = "Mozilla/5.0 (Windows; U; Windows NT 5.2; zh-CN; rv:1.9.1.4) Gecko/20091016 Firefox/3.5.4 (.NET CLR 3.5.30729)";
            res = req.GetResponse() as HttpWebResponse;
            ReceiveStream = res.GetResponseStream();
            sr = new StreamReader(ReceiveStream, HtmlEncoding);
            ReturnedContent = sr.ReadToEnd();
            return ReturnedContent;
          }
          catch
          {
            return "获取失败!";
          }
        }


    2009年11月5日 12:37
    版主

全部回复

  • 先从req 中读取解析出编码是什么再读取。
    【孟子E章】
    2009年11月2日 1:10
    版主
  • 版主您好,能说的详细一点吗?C#初学者,给个示例吧,非常感谢
    2009年11月2日 8:44
  • 顺便问一下版主如何不让从缓存获取网页信息,除了后面加 “?”随机码 的办法。
    2009年11月2日 9:27
  • public string gethttpcode(string uri, string method)
    {
      try
      {
        WebRequest req = WebRequest.Create(uri);
        req.Method = method;
        req.Timeout = 10000;
        HttpWebResponse res = req.GetResponse() as HttpWebResponse;
        Stream ReceiveStream = res.GetResponseStream();
        StreamReader sr = new StreamReader(ReceiveStream, Encoding.Default);
        string resultstring = sr.ReadToEnd();
        return resultstring;
      }
      catch
      {
        return "获取失败!";
      }
    }


    【孟子E章】
    2009年11月2日 13:28
    版主
  • 本来是UTF8网页正常,GB2312的网页是乱码。用了版主的代码,GB2312正常了,UTF8的网页成乱码了...郁闷...
    2009年11月5日 10:38
  • 你需要先得到网页编码。下面这段代码可以解决大部分的网页
      private void button3_Click(object sender, EventArgs e)
        {
          String[] UrlList = {
                             "http://www.kbs.co.kr/",
                             "http://rosemary.kbs.co.kr/",
                             "http://sbcx.saic.gov.cn/trade/index.jsp",
                             "http://www.csdn.net",
                             "http://www.google.cn/",
                             "http://www.baidu.com",
                             "http://www.javaeye.com/",
                             "http://blog.163.com/kel_scott66/blog/static/1150539632009614115635700/",
                             "http://www.sina.com.hk/",
                             "http://www.rthk.org.hk/"
                            };
          foreach (String u in UrlList)
          {
            textBox1.Text = GetWebPage(u, "GET");
            MessageBox.Show(u);
          }
        }
    
        public string GetWebPage(string uri, string method)
        {
          try
          {
            HttpWebRequest req = (HttpWebRequest)WebRequest.Create(uri);
            req.Method = method;
            req.Timeout = 10000;
            req.UserAgent = "Mozilla/5.0 (Windows; U; Windows NT 5.2; zh-CN; rv:1.9.1.4) Gecko/20091016 Firefox/3.5.4 (.NET CLR 3.5.30729)";
            String ReturnedEncoding = "";
            HttpWebResponse res = req.GetResponse() as HttpWebResponse;
            Stream ReceiveStream = res.GetResponseStream();
            StreamReader sr = new StreamReader(ReceiveStream, Encoding.UTF8);
            string ReturnedContent = sr.ReadToEnd();
    
            if (ReturnedEncoding == "")
            {
              //string h = "<meta http-equiv='Content-Type' content='text/html; charset=big5'>";
              Regex reg_charset = new Regex(@"charset\b\s*=\s*(?<charset>[^""|^'']*)");
              if (reg_charset.IsMatch(ReturnedContent))
              {
                ReturnedEncoding = reg_charset.Match(ReturnedContent).Groups["charset"].Value;
              }
            }
    
            if (ReturnedEncoding == "")
            {
              String ct = res.ContentType.ToLower().Replace(" ", "");
              if (ct.IndexOf("charset") > -1)
              {
                ReturnedEncoding = ct.Substring(ct.IndexOf("charset=") + 8);
              }
            }
    
            if (ReturnedEncoding == "")
            {
              ReturnedEncoding = res.ContentEncoding;
            }
    
    
            if (ReturnedEncoding == "")
            {
              ReturnedEncoding = res.CharacterSet;
            }
    
            Encoding HtmlEncoding = Encoding.Default;
            if (ReturnedEncoding != "")
            {
              HtmlEncoding = Encoding.GetEncoding(ReturnedEncoding);
            }
    
            req = (HttpWebRequest)WebRequest.Create(uri);
            req.Method = method;
            req.Timeout = 10000;
            req.UserAgent = "Mozilla/5.0 (Windows; U; Windows NT 5.2; zh-CN; rv:1.9.1.4) Gecko/20091016 Firefox/3.5.4 (.NET CLR 3.5.30729)";
            res = req.GetResponse() as HttpWebResponse;
            ReceiveStream = res.GetResponseStream();
            sr = new StreamReader(ReceiveStream, HtmlEncoding);
            ReturnedContent = sr.ReadToEnd();
            return ReturnedContent;
          }
          catch
          {
            return "获取失败!";
          }
        }


    2009年11月5日 12:37
    版主