本文实例总结了三种常用的C#网页信息采集方法。分享给大家供大家参考。具体实现方法如下:
一、通过HttpWebResponse 来获取
public static string CheckTeamSiteUrl(string url)
{
string response = "";
HttpWebResponse httpResponse = null;
//assert: user have access to URL
try
{
HttpWebRequest httpRequest = (HttpWebRequest)WebRequest.Create(url);
httpRequest.Headers.Set("Pragma", "no-cache");
// request.Headers.Set("KeepAlive", "true");
httpRequest.CookieContainer = new CookieContainer();
httpRequest.Referer = url;
httpRequest.UserAgent = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0; .NET CLR 1.1.4322; .NET CLR 2.0.50727)";
httpRequest.Credentials = System.Net.CredentialCache.DefaultCredentials;
httpResponse = (HttpWebResponse)httpRequest.GetResponse();
}
catch (Exception ex)
{
throw new ApplicationException("HTTP 403 Access denied, URL: " + url, ex);
}
//if here, the URL is correct and the user has access
try
{
string strEncod = httpResponse.ContentType;
StreamReader stream;
if (strEncod.ToLower().IndexOf("utf") != -1)
{
stream = new StreamReader(httpResponse.GetResponseStream(), System.Text.Encoding.UTF8);
}
else
{
stream = new StreamReader(httpResponse.GetResponseStream(), System.Text.Encoding.Default);
}
char[] buff = new char[4000];
stream.ReadBlock(buff,0,4000);
response = new string(buff);
stream.Close();
httpResponse.Close();
}
catch (Exception ex)
{
throw new ApplicationException("HTTP 404 Page not found, URL: " + url, ex);
}
return response;
}
二、通过 WebResponse 来获取
public static string getPage(String url)
{
WebResponse result = null;
&nbsZD2>z[i |