O código a seguir lê páginas da internet, mas as tags html geram muito lixo como resolver isso? Como imprimir sem tags?
using
System;
using
System.Collections.Generic;
using
System.Linq;
using
System.Text;
using
System.Net;
using
System.IO;
namespace
HttpResponse
{
class Program
{
static void Main(string[] args)
{
string url = "http://www.uol.com.br";
int QTD = 1 * 1024 * 1024;
int cont = 0;
HttpWebRequest myHttpWebRequest =
HttpWebRequest)WebRequest.Create(url);
HttpWebResponse myHttpWebResponse = (HttpWebResponse)myHttpWebRequest.GetResponse();
Stream receiveStream = myHttpWebResponse.GetResponseStream();
Encoding encode = System.Text.Encoding.GetEncoding("utf-8");
StreamReader readStream =
new StreamReader(receiveStream, encode);
Console.WriteLine("\r\nFluxo de resposta recebida.");
Char[] read =
new Char[QTD];
int count = readStream.Read(read, 0, 256);
Console.WriteLine("HTML...\r\n");
while(count > 0)
{
String
str = new String(read, 0, count);
String[] words = str.Split(' ');
foreach (string word
in words)
{
Console.WriteLine(word);
if(word.Equals("Brasil"))
{
cont++;
}
}
count = readStream.Read(read, 0, QTD);
}
Console.Write(cont);
Console.ReadKey();
myHttpWebResponse.Close();
readStream.Close();
}
}
}