none
how to resolve the performance problem? RRS feed

  • Question

  • Hi,

         I have developed one application for Crawling urls. It working fine.
     
        But when i run this app, starting it is giving good performance. when time is increasing it is running slowly. after 10 or 15min it is going Struck off.

    how to resolve the above problem?

    here is  some part of the code.

      private void BackgroundWorker_Dowork(object sender, DoWorkEventArgs e)
            {
                //Debug.WriteLine("urlsCrawled : " + urlsCrawled);
                List<List<String>> domainsUrlsEmails=new List<List<string>>();
                HttpWebResponse response=null;
                StreamReader readStream=null;

                try
                {
                    //if (e.Argument.ToString().Substring(e.Argument.ToString().Length - 1, 1) != "0")
                    //{
                    //    string result = SocketSendReceive(textBox1.Text, port);
                    //    Debug.WriteLine("urlsCrawled : " + urlsCrawled);
                    //    domainsUrlsEmails = regex.GetDomainsUrlsEmails(result);
                    //}
                    //else
                    //{
                        HttpWebRequest request;
                        if (!e.Argument.ToString().StartsWith("http://") && !e.Argument.ToString().StartsWith("ftp://"))
                        {
                            request = (HttpWebRequest)HttpWebRequest.Create("http://" + e.Argument.ToString().Substring(0, e.Argument.ToString().Length - 2));
                       
                        }
                        else
                        {
                            request = (HttpWebRequest)HttpWebRequest.Create(e.Argument.ToString().Substring(0, e.Argument.ToString().Length - 2));
                        }
                        //if (st.Length > 0)
                        //    st.Remove(0, st.Length - 1);
                        // Set some reasonable limits on resources used by this request
                        request.MaximumAutomaticRedirections = 4;
                        request.MaximumResponseHeadersLength = 4;
                        // Set credentials to use for this request.
                        request.Credentials = CredentialCache.DefaultCredentials;
                        if (!stop)
                        {
                            response = (HttpWebResponse)request.GetResponse();

                            // Get the stream associated with the response.
                            Stream receiveStream = response.GetResponseStream();

                            // Pipes the stream to a higher level stream reader with the required encoding format.
                            readStream = new StreamReader(receiveStream, Encoding.UTF8);

                            //Debug.WriteLine("Response stream received." + DateTime.Now);
                            String text = readStream.ReadToEnd();

                            //Counting the number of urls crawled

                            Debug.WriteLine("urlsCrawled : " + urlsCrawled);
                            domainsUrlsEmails = regex.GetDomainsUrlsEmails(text);
                            response.Close();
                            readStream.Close();
                        }
                    //}
                    domains.AddRange(domainsUrlsEmails[0]);
                    emails.AddRange(domainsUrlsEmails[2]);
                    if (maximumUrlsToExtract > urlsCrawled)
                    {
                        switch (e.Argument.ToString().Substring(e.Argument.ToString().Length - 1, 1))
                        {
                            case "0":
                                for(int i=0;i<domainsUrlsEmails[1].Count;i++)
                                    urlsLevel1.Enqueue(domainsUrlsEmails[1]Idea);
                                break;
                            case "1":
                                for (int i = 0; i < domainsUrlsEmails[1].Count; i++)
                                    urlsLevel2.Enqueue(domainsUrlsEmails[1]Idea);
                                break;
                            case "2":
                                for (int i = 0; i < domainsUrlsEmails[1].Count; i++)
                                    urlsLevel3.Enqueue(domainsUrlsEmails[1]Idea);
                                break;
                            case "3":
                                for (int i = 0; i < domainsUrlsEmails[1].Count; i++)
                                    urlsLevel4.Enqueue(domainsUrlsEmails[1]Idea);
                                break;
                            case "4":
                                for (int i = 0; i < domainsUrlsEmails[1].Count; i++)
                                    urlsLevel5.Enqueue(domainsUrlsEmails[1]Idea);
                                break;
                            case "5":
                                for (int i = 0; i < domainsUrlsEmails[1].Count; i++)
                                    urlsLevel6.Enqueue(domainsUrlsEmails[1]Idea);
                                break;
                        }
                    }

                    //Debug.WriteLine("urlsCrawled : " + urlsCrawled);

                }
                catch (Exception ex)
                {
                    //urlsCrawled--;
                    Debug.WriteLine(" DoWork :" + ex.Message );

                }
               
            }


    please help me

    Monday, April 21, 2008 4:06 AM

Answers

  • It seems from the code that you're building some sort of collection (even multiple ones) containing the information you're looking for.  It's possible that these collections are getting filled with objects that are never removed, and therefore your memory usage keeps climbing until you get an OutOfMemoryException.  The reason for the slowness before the application crashes is probably that you have lots of garbage collection going on.

     

    If you don't need all the data you're accumulating, consider clearing the collections once in a while.  If you do need it, write it to some kind of persistent storage and then discard it.  Finally, if the memory consumption doesn't stem from that part of the code, you might find my post on diagnosing .NET memory leaks useful:

     

    http://blogs.microsoft.co.il/blogs/sasha/archive/2008/04/08/next-generation-production-debugging-demo-2-and-demo-3.aspx

     

    Sasha

    Monday, April 21, 2008 8:27 AM