none
[IE编程]MIME Filter不能稳定工作?请大虾指点! RRS feed

  • 问题

  • 环境:VS2005 C# WinForm XP SP2 IE8

    IE异步可插入协议支持两种扩展,用代码说话就是:

    IInternetSession.RegisterNameSpace
    IInternetSession.RegisterMimeFilter

    第一种基于协议名称比如"http"。这种扩展我很早就将之应用于程序,实现了直接从内存加载网页。一直以来都非常稳定可靠。

    最近做网页过滤,想采用MimeFilter。因为它是针对网页数据类型的。此外,NameSpace下webBrowser只向协议索取数据,MimeFilter则是webBrowser先向协议提供数据,用户处理后再返给webBrowser。所以我认为MimeFilter适合用来做网页内容过滤。主要针对html和js数据。

    但令人失望的是,尽管我修改了上百次代码,MimeFilter还是不稳定。具体表现为:

    1 MimeFilter提供的数据可能不完整。(比如说它没给出<html>标签而直接从<body>开始了,甚至<body>也没有直接从文字开始了)
    2 无法准确判定MimeFilter何时提供完毕数据。
    3 偶尔会出现未处理的win32内存错误,C#中无法捕获,也无法确定源码位置。

    1和3都是致命的。请教下大虾,是我代码有问题,还是MimeFilter本身就不稳定?另外,有没有C#或者VC的MIME Filter源码可供参考?网上只有一个凤之焚的VC源码,但运行后N多异常...

    下面我贴下我的关键部分代码:

      [ComVisible(true)]
      [Guid("1c470f4b-0486-4558-80c7-ad08d652a465"), ClassInterface(ClassInterfaceType.AutoDispatch)]
      public class MimeHandler : IInternetProtocol, IInternetProtocolSink//,IInternetProtocolRoot
      {
        #region 全局对象
        private string cacheFileName = "";
    
        private string url = "";
    
        private MemoryStream dataStream = new MemoryStream();
    
        private IInternetProtocol urlMonProtocol;
    
        private IInternetProtocolSink urlMonProtocolSink;
    
        private byte[] buffer = null;
        private uint written = 0;
        private long totalSize = 0;
    
        private string MimeType = "";
        private string char_set = "";
        #endregion
    
        /** MIME类型
        MIME type 
        text/html;
        text/css
        application/x-javascript(试验性质)
        application/javascript
        text/javascript(已过时)
        **/
        public void Start(string szURL, IInternetProtocolSink Sink,
          IInternetBindInfo pOIBindInfo, uint grfPI, uint dwReserved)
        {
            //作为MIME过滤对象,szUrl传入的是MIME的类型
            //(如果是name space handlers对象,则该参数为一个即将下载或解析的URL)。
            //在Start方法中,我们必需做的实际上只是保存urlmon.dll提供的IInternetProtocolSink
            //和IInternetProtocol接口。
    
            if (Sink == null)
            { throw new COMException("", E_POINTER); }
    
            //获取网址
            uint Fetched = 0;
            const int BINDSTRING_URL = 14;
    
            MimeType = szURL;
    
            pOIBindInfo.GetBindString(BINDSTRING_URL, ref url, 1, ref Fetched);
    
            urlMonProtocol = (IInternetProtocol)Sink;
            urlMonProtocolSink = Sink;
    
            //ReportData可能会被调用好几次,每次提交一部分数据。
            dataStream.Close();
            dataStream = new MemoryStream();
    
            throw new COMException("", S_OK);
        }
    
        public void ReportProgress(uint ulStatusCode, string szStatusText)
        {
          //作为MIME过滤器,ulStatusCode一般都是BINDSTATUS_CACHEFILENAMEAVAILABLE , 
          //当ulStatusCode为BINDSTATUS_CACHEFILENAM EAVAILABLE时,szStatusText为临时
          //缓存文件的路径名称,但有一些网页并不写到缓存里,所以szStatusText可能为空字符串。
    
          uint bindStatus = Convert.ToUInt32(BINDSTATUS.BINDSTATUS_CACHEFILENAMEAVAILABLE);
    
          if (ulStatusCode == bindStatus)
          {
            cacheFileName = szStatusText;
          }
    
          urlMonProtocolSink.ReportProgress(ulStatusCode, szStatusText);
          throw new COMException("", S_OK);
        }
    
        public void ReportData(BSCF grfBSCF, uint ulProgress, uint ulProgressMax)
        {
          try
          {
            //ulProgressMax有时一直为0
    
            UInt32 resultOfRead;
            //获取数据 - 处理数据 -交回数据
    
            //IE下载文件过程中或下载完毕时会调用MIME过滤器的ReportData方法,ulProgressMax为文件总是数据量,
            //ulProgress为下载进度,理论上当文件全部下载完后,ulProgress应等于ulProgressMax(实际上,当网页
            //文件不是很大时,即使ulProgress不等于ulProgressMax时,文件也可能全部下载下来),还有一个反应文
            //件下载情况的参数是grfBSCF。有时,ReportData方法会被Web处理器调用多次。 
            //ReportData是过滤网页内容或修改网页内容比较合适的地方。在此地,可以将网页内容通过调用Read保存
            //到自已的缓存或流中并做适当的处理(注意检查字符的编码)。
            //最后,别忘了调用Web处理器的IInternetProtocolSink::ReportData方法,向它汇报数据下载的情况。
            //Web处理器得到此通知后,就会调用MIME过滤器的IInternetProtocol::Read,此时,你就可以将修改后的
            //数据交给WEB处理器。
    
            //所有可能的返回结果
            //There is no more data available from the server, but more data was expected.
            uint INET_E_DATA_NOT_AVAILABLE = 0x800C0007;
    
            //The read failed.
            uint INET_E_DOWNLOAD_FAILURE = 0x800C0008;
    
            //The read operation is pending.
            uint E_PENDING = 0x8000000A;
    
            //The read was successful, but there is still additional data available.
            uint S_OK = 0x00000000;
    
            //All of the data has been completely downloaded.
            uint S_FALSE = 0x00000001;
            uint totalRead = 0;
    
            do
            {
              byte[] readBuffer = new byte[8192];
              
              int size = readBuffer.Length;
              IntPtr unmanagedPointer = Marshal.AllocHGlobal(size);//win32地址
    
              Marshal.Copy(readBuffer, 0, unmanagedPointer, readBuffer.Length);//初始化win32内存
    
              // Call unmanaged code 
    
              //这里并不知道有多少数据需要下载,给IE个内存空间让其往里放数据,根据返回值判断是否下载完毕。
              resultOfRead = urlMonProtocol.Read(unmanagedPointer, Convert.ToUInt32(readBuffer.Length), out totalRead);
              //add(Convert.ToString((int)resultOfRead,16));
    
              if (resultOfRead == S_OK)
              {
                Marshal.Copy(unmanagedPointer, readBuffer, 0, 8192);
                dataStream.Write(readBuffer, 0, Convert.ToInt32(totalRead));
              }
    
              //free memory 
              Marshal.FreeHGlobal(unmanagedPointer);
    
            } while ((resultOfRead != S_FALSE) && (resultOfRead != INET_E_DOWNLOAD_FAILURE) && (resultOfRead != INET_E_DATA_NOT_AVAILABLE));
            //while (resultOfRead == S_OK);
    
            //由于ReportData可能会被调用多次,故没法准确判定什么时候数据提供完毕,只能这样一次次提交。
            //ulProgressMax根本不可靠,它有时一直是0.这样的后果是js在前几次提交过程中由于不完整会报错误。
            if (resultOfRead == S_FALSE && HandleData())
            {
              urlMonProtocolSink.ReportData(//交回数据
                BSCF.BSCF_FIRSTDATANOTIFICATION | BSCF.BSCF_LASTDATANOTIFICATION | BSCF.BSCF_DATAFULLYAVAILABLE,
                Convert.ToUInt32(totalSize), Convert.ToUInt32(totalSize));
    
              urlMonProtocolSink.ReportResult(Convert.ToInt32(HRESULT.S_OK), HRESULT.S_OK, null);
            }
          }
          catch(Exception ex)
          {}
        }
    
        private bool HandleData()
        {
          try
          {
    
            byte[] data = new byte[(int)dataStream.Length];
            long pos = dataStream.Position;
            dataStream.Seek(0, SeekOrigin.Begin);
            dataStream.Read(data, 0, data.Length);
            dataStream.Seek(pos, SeekOrigin.Begin);
             
            string temp = "";
            string content = BytesToString(data, out temp); 
            if(content.Length == 0)
            {return false;}
            content = content.Replace("体育", "踢鱼");
        
            buffer = StringToBytes(content, temp);
            written = 0;
            totalSize = buffer.Length;
            return true;
          }
          catch(Exception ex)
          {return false;}
        }
    
        //IE读取你交回的数据
        public UInt32 Read(System.IntPtr pv, uint cb, out uint pcbRead)
        { 
          try
          {
            pcbRead = 0;
    
            if (written < totalSize)
            {
              if (totalSize - written <= cb)
              {
                Marshal.Copy(buffer, (int)written, pv, (int)(totalSize - written));
                pcbRead = (uint)(totalSize - written);
                written += (uint)(totalSize - written);
                return (UInt32)S_FALSE;
              }
              else
              {
                Marshal.Copy(buffer, (int)written, pv, (int)cb);
                written += cb;
                pcbRead = cb;
                return (UInt32)S_OK;
              }
            }
            else
            {
              return (UInt32)S_FALSE;
            }
          }
          catch(Exception ex)
          {
    
            pcbRead = 0;
            return (UInt32)S_FALSE;
          }
        }
        
        public void Resume()
        {
          urlMonProtocol.Resume();
          //throw new COMException("", unchecked((int)0x80004001));
        }
    
        public void Terminate(uint dwOptions)
        {
          urlMonProtocol.Terminate(dwOptions);
          //throw new COMException("", S_OK);
        }
    
        public void Seek(_LARGE_INTEGER dlibMove, uint dwOrigin, out _ULARGE_INTEGER plibNewPosition)
        {
          urlMonProtocol.Seek(dlibMove, dwOrigin, out plibNewPosition);
          //throw new COMException("", S_OK);
        }
    
        public void LockRequest(uint dwOptions)
        {
          urlMonProtocol.LockRequest(dwOptions);
          //throw new COMException("", S_OK);
        }
    
        public void UnlockRequest()
        {
          urlMonProtocol.UnlockRequest();
          //throw new COMException("", S_OK);
        }
    
        public void Abort(int hrReason, uint dwOptions)
        {
          urlMonProtocol.Abort(hrReason, dwOptions);
          //throw new COMException("", S_OK);
        }
    
        public void Suspend()
        {
          urlMonProtocol.Suspend();
          //throw new COMException("", unchecked((int)0x80004001));
        }
    
        public void Continue(ref _tagPROTOCOLDATA pProtocolData)
        {
          urlMonProtocol.Continue(ref pProtocolData);
          //throw new COMException("", S_OK);
        }
        
        public void Switch(ref _tagPROTOCOLDATA pProtocolData)
        {
          urlMonProtocolSink.Switch(ref pProtocolData);
          //throw new COMException("", S_OK);
        }
    
        public void ReportResult(int hrResult, uint dwError, string szResult)
        {
          urlMonProtocolSink.ReportResult(hrResult, dwError, szResult);
          //throw new COMException("", S_OK);
        }
    
        #region 辅助函数
        public static IHttpNegotiate GetHttpNegotiate(IInternetProtocolSink Sink)
        { 
    
           if ((Sink is IServiceProvider) == false) 
    
             throw new Exception("Error ProtocolSink does not support IServiceProvider."); 
      
           IServiceProvider Provider = (IServiceProvider) Sink; 
    
           object obj_Negotiate = new object(); 
    
           Provider.QueryService(ref Guids.IID_IHttpNegotiate, ref Guids.IID_IHttpNegotiate, out obj_Negotiate); 
    
           return (IHttpNegotiate) obj_Negotiate; 
         } 
    
        public static BINDINFO GetBindInfo(IInternetBindInfo pOIBindInfo)
        { 
          BINDINFO BindInfo = new BINDINFO(); 
    
          BindInfo.cbSize = (UInt32) Marshal.SizeOf(typeof (BINDINFO)); 
    
          UInt32 AsyncFlag; 
    
          pOIBindInfo.GetBindInfo(out AsyncFlag, ref BindInfo); 
    
          return BindInfo; 
        } 
    
        private string BytesToString(byte[] data,out string charSet)
        {
          charSet = "";
          string sHtml = Encoding.Default.GetString(data);
          string CharSet = "";
    
          Regex rg = new Regex("<meta.+?charset=(?<name>.*?)\".*?>", RegexOptions.IgnoreCase);
          Match m = rg.Match(sHtml);
    
          if(m.Success)
          {
            CharSet = m.Groups["name"].Value;
            charSet = CharSet;
          }
    
          if (CharSet.Length > 0)
          {
            sHtml = Encoding.GetEncoding(CharSet).GetString(data);
          }
    
          return sHtml;
        }
    
        private byte[] StringToBytes(string text,string charSet)
        {
          if (charSet.Length > 0)
          { return Encoding.GetEncoding(charSet).GetBytes(text); }
          else
          { return Encoding.Default.GetBytes(text); }
        }
        #endregion
      }
    

     

     

     

     

     

     

     

    • 已移动 Aspen VJ 2011年7月4日 3:22 (发件人:Visual C#)
    2011年6月29日 15:28