技术开发 频道

读取目标网页的HTML,用System.Net.HttpWebRequest简单封装了一个CLA


【IT168技术文档】

using System.Web; 2using System.Net; 3using System.IO; 4 5namespace MyProject.Controllers 6{ 7 public class WebRequestUtility 8 { 9 public string RequestUrl{ get;set;} 10 public string Content { get; set; } 11 public bool ifUrlAvailable { get; set; } 12 13 public WebRequestUtility(string url) 14 { 15 RequestUrl = url; 16 Content = getHttpContent(url); 17 if (Content != "failUrl") 18 { 19 ifUrlAvailable = true; 20 } 21 22 } 23 24 25 /**//// <summary> 26 /// 根据URL读取返回的HTML内容 27 /// </summary> 28 /// <param name="url"></param> 29 /// <returns></returns> 30 public HttpWebRequest getHttpResponse(string url) 31 { 32 string requestStr = url; 33 HttpWebRequest req = (HttpWebRequest)HttpWebRequest.Create(requestStr); 34 req.Method = "GET"; 35 return req; 36 37 } 38 39 public string getHttpContent(string url) 40 { 41 HttpWebRequest req = getHttpResponse(url); 42 try 43 { 44 WebResponse wr = req.GetResponse(); 45 StreamReader sr = new StreamReader(wr.GetResponseStream(), System.Text.Encoding.GetEncoding("gb2312")); 46 return sr.ReadToEnd(); 47 48 } 49 catch 50 { 51 //if (e.Status == WebExceptionStatus.ProtocolError) 52 return "failUrl"; 53 } 54 55 } 56 57 58 public bool checkUrlAvailability(string url) 59 { 60 61 //string requestStr = url; 62 //HttpWebRequest req = (HttpWebRequest)HttpWebRequest.Create(requestStr); 63 //req.Method = "GET"; 64 HttpWebRequest req = getHttpResponse(url); 65 try 66 { 67 req.GetResponse(); 68 69 } 70 catch (WebException e) 71 { 72 if (e.Status == WebExceptionStatus.ProtocolError) 73 return false; 74 } 75 76 return true; 77 78 } 79 80 /**//// <summary> 81 /// 返回错误信息的逻辑 82 /// </summary> 83 /// <param name="e"></param> 84 protected void logException(string e) 85 { 86 //HttpContext.Current.Response.Write(e); 87 } 88 89 90 } 91}
  构造函数在实例化这个类的时候传入URL地址,此时发出http请求并进行回应。class.Content便是回应的html代码。
  本人在公司里实习之初做的一些事情就是网页信息抓取,这个class用的比较多,自己用用还是能正常运行没有碰到什么问题。
0
相关文章