none
C# HttpClient.GetAsync returns 404 Not Found. RRS feed

  • Question

  • Hi, I'm trying to scrap a web page in order to convert it to pdf, I tried several dll, including the ones native in .Net, but always I'm getting a 404 Not Found. But I put the url in a browser and the page appears

     public void ConvertPDF()
{
     var url = "http://localhost:4200/statementaccount/id”;
    //id is a Guid, with come from DB 

     WebClientService httpClient = new WebClientService(url);
   
    ResponseWebClientModel response = httpClient.RequestGet(url);


    //always response 404
 
    if (!response.error)
 {
    //never enter here
    var content = response.content;
  
    //TODO: save to pfd the response
  
    }
    }
    
    
    public ResponseWebClientModel RequestGet(string url, bool setCookies = false, string urlcoockie = "", string urlcache = "")
        {

    response = new ResponseWebClientModel();
            
    HttpClient.GetAsync(url).ContinueWith((task, o) =>
            {
    try {
        var responseAsync = task.Result;
                                     
    responseAsync.EnsureSuccessStatusCode();
    
    response.refererUrl = responseAsync.RequestMessage.RequestUri.ToString();
                    
    response.content = responseAsync.Content.ReadAsStringAsync().Result;
                    
    if (setCookies)
                    {
                        
    if (urlcoockie != "")
                        {
                            
    var uri = new Uri(urlcoockie);
                            
    var cookieCollection = _CookieContainer.GetCookies(uri);
                            
    IEnumerable<string> cookiesHeaders = responseAsync.Headers.GetValues("Set-Cookie");
                            
    var missingCookies = cookiesHeaders.Where(val => !cookieCollection.Cast<Cookie>().Any(c => val.StartsWith($"{ c.Name}=")));

                            
    if (urlcache != "")
                            {
                                
    var cookieCollection2 = _CookieContainer.GetCookies(new Uri(urlcache));
                                
    missingCookies = cookiesHeaders.Where(val => !cookieCollection2.Cast<Cookie>().Any(c => 		val.StartsWith($"{ c.Name}=")));
                            }
    foreach (var missingCookie in missingCookies)
                            {
                                var keyValue = 
    missingCookie.Split('=');
                                
    var cookieName = keyValue[0];
                                
    var value = keyValue[1].Split(';')[0];
                                
    if (urlcache != "")
                                {
                                    _CookieContainer.Add(new Uri(urlcache),
                                    new Cookie(cookieName, value));
                                }
                            }
                        }

                        catch (Exception e)
                {
                    
    response.error = true;
                    
    response.content = e.Message;


    }}, null).Wait();
    return response;
    }

    <style type="text/css">p.p1 {margin: 0.0px 0.0px 0.0px 0.0px; font: 12.0px 'Helvetica Neue'} p.p2 {margin: 0.0px 0.0px 0.0px 0.0px; font: 12.0px 'Helvetica Neue'; min-height: 14.0px} span.Apple-tab-span {white-space:pre} </style>
    Thursday, May 2, 2019 3:47 PM

All replies

  • 404 can mean it cannot find the website or a rest service is returning it can not find the item requested.  What url are you hitting?  Could the web site be expecting header before it lets you view or get the html?  If you are trying to scrape the html why not just use GetStringAsync?

         HttpClient client = new HttpClient();

             var html = await client.GetStringAsync("https://microsoft.com");

    Friday, May 3, 2019 12:27 AM