locked
Troubleshooting App Hang on Screen Scrape App RRS feed

  • Question

  • User1164381444 posted

    I'm working on an app that screen scrapes some local crime data from a website.  Eventually I'd like to geo-locate the data and alert myself to new entries in my neighborhood.

    Unfortunately the site requires that I agree not to hold the provider responsible if the data turns out to be wrong.

    I arrived at the following code to attempt to get past the agreement so I can move on to parsing the data for my neighborhood:

    	    string url = "http://www.columbuspolice.org/Reports";
                string postData = "btnAgree=I%20Understand";
                string urlAgree = "http://www.columbuspolice.org/Reports/PubDefault.aspx" + "?" + postData;
                
                CookieContainer cookieContainer = new CookieContainer();
    
                HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url);
                request.CookieContainer = cookieContainer;
    
                HttpWebResponse response = (HttpWebResponse)request.GetResponse();
                Stream responseStream = response.GetResponseStream();
                StreamReader reader = new StreamReader(responseStream);
                Response.Write(reader.ReadToEnd());
                Response.Write("<hr />"); // Separate page 1 results from page 2
    
                HttpWebRequest agreeRequest = (HttpWebRequest)WebRequest.Create(urlAgree);
                agreeRequest.CookieContainer = cookieContainer;
                // Add returned cookies to CookeContainer
                foreach (Cookie cook in response.Cookies)
                {
                    agreeRequest.CookieContainer.Add(cook);
                }
                agreeRequest.Method = WebRequestMethods.Http.Post;
                agreeRequest.ContentType = "application/x-www-form-urlencoded";
                agreeRequest.ContentLength = postData.Length;
    
                try
                {
                    HttpWebResponse agreeResponse = (HttpWebResponse)agreeRequest.GetResponse();
                    Stream agreeResponseStream = agreeResponse.GetResponseStream();
                    StreamReader agreeReader = new StreamReader(agreeResponseStream);
    
                    Response.Write(agreeReader.ReadToEnd());
                }
                catch (Exception error) {
                    Console.WriteLine("{0} Exception caught.", error);
                }
    It hangs when it gets to the "try-catch" block. It'd be nice if I could get some kind of error explaining why it's hanging but I just get a page with a "loading" bar at the bottom in IE8.
    I'm going to poke around with Fiddler - maybe I'm passing the wrong info, or passing it to the wrong page... but insights on how to troubleshoot this would be appreciated.
    Friday, July 24, 2009 2:29 PM

Answers

  • User-1568287205 posted

    You're telling the web request to expect a specific amount of POST data, but not sending it that data, so it keeps waiting for the data. Also, Console.WriteLine is not a good idea in a web app. :-) Here's a revision of the code:

    //** To replace your original code
    
    string url = "http://www.columbuspolice.org/Reports";  
    string postData = "btnAgree=I%20Understand";  
    string urlAgree = "http://www.columbuspolice.org/Reports/PubDefault.aspx" + "?" + postData;  
       
    var cookieContainer = new CookieContainer();
    
    UrlResponse response = RequestUrl(url,null,cookieContainer,null);
    Response.Write(response.ResponseText);  
    Response.Write("<hr />"); // Separate page 1 results from page 2  
    
    
    // Add returned cookies to CookeContainer  
    foreach (Cookie cook in response.Cookies)  
        cookieContainer.Add(cook);  
    
    UrlResponse agreeResponse = RequestUrl(urlAgree,postData,cookieContainer,"application/x-www-form-urlencoded");
    Response.Write(agreeResponse.ResponseText);  
    
    
    //** Helper method to make web requests for you
    
    UrlResponse RequestUrl(string url, string postBody, CookieContainer cookies, string contentType)
    {
    
        var httpRequest = (HttpWebRequest)WebRequest.Create(url);
        if(cookies!=null)
            httpRequest.CookieContainer = cookies;
    
        //POST request
        if (postBody != null)
        {
            httpRequest.Method = "POST";
            
            //this stuff is to prevent a known issue where in some cases the sockets will keep hanging around
            httpRequest.KeepAlive = false;
            httpRequest.ConnectionGroupName = Guid.NewGuid().ToString();
            httpRequest.ProtocolVersion = HttpVersion.Version10;
    
            //send the POST data
            httpRequest.ContentType = contentType;
            byte[] data = Encoding.UTF8.GetBytes(postBody);
            httpRequest.ContentLength = data.Length;
            using(reqStream = httpRequest.GetRequestStream())
                reqStream.Write(data, 0, data.Length);
        }
        else
            httpRequest.Method = "GET";
    
        using (var response = httpRequest.GetResponse())
        {
            using (var sReader = new StreamReader(response.GetResponseStream()))
            {
                return new UrlResponse(sReader.ReadToEnd(), response.Cookies.OfType<Cookie>().ToArray());
            }
        }
    }
    
    //** Container class to hold both the response text and the response cookies
    
    class UrlResponse
    {
        public UrlResponse(string response, Cookie[] cookies)
        {
            ResponseText = response;
            Cookies = cookies;
        }
    
        public string ResponseText{get;set;}
        public Cookie[] Cookies{get;set;}
    }

    The code is untested, but it should work.

    • Marked as answer by Anonymous Thursday, October 7, 2021 12:00 AM
    Saturday, July 25, 2009 5:32 AM