Created
June 26, 2014 02:10
-
-
Save aeesky/2aa67d4b5daee542af1b to your computer and use it in GitHub Desktop.
HtmlAgility
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| public Encoding GetEncoding(string CharacterSet) | |
| { | |
| switch (CharacterSet) | |
| { | |
| case "gb2312": return Encoding.GetEncoding("gb2312"); | |
| case "utf-8": return Encoding.UTF8; | |
| default: return Encoding.Default; | |
| } | |
| } | |
| public string HttpGet(string url) | |
| { | |
| string responsestr = ""; | |
| HttpWebRequest req = HttpWebRequest.Create(url) as HttpWebRequest; | |
| req.Accept = "*/*"; | |
| req.Method = "GET"; | |
| req.UserAgent = "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.89 Safari/537.1"; | |
| using (HttpWebResponse response = req.GetResponse() as HttpWebResponse) | |
| { | |
| Stream stream; | |
| if (response.ContentEncoding.ToLower().Contains("gzip")) | |
| { | |
| stream = new GZipStream(response.GetResponseStream(), CompressionMode.Decompress); | |
| } | |
| else if (response.ContentEncoding.ToLower().Contains("deflate")) | |
| { | |
| stream = new DeflateStream(response.GetResponseStream(), CompressionMode.Decompress); | |
| } | |
| else | |
| { | |
| stream = response.GetResponseStream(); | |
| } | |
| using (StreamReader reader = new StreamReader(stream, GetEncoding(response.CharacterSet))) | |
| { | |
| responsestr = reader.ReadToEnd(); | |
| stream.Dispose(); | |
| } | |
| } | |
| return responsestr; | |
| } | |
| string html = HttpGet("http://www.cnblogs.com/"); | |
| HtmlDocument doc = new HtmlDocument(); | |
| doc.LoadHtml(html); | |
| //获取文章列表 | |
| var artlist = doc.DocumentNode.SelectNodes("//div[@class='post_item']"); | |
| foreach (var item in artlist) | |
| { | |
| HtmlDocument adoc = new HtmlDocument(); | |
| adoc.LoadHtml(item.InnerHtml); | |
| var html_a = adoc.DocumentNode.SelectSingleNode("//a[@class='titlelnk']"); | |
| Response.Write(string.Format("标题为:{0},链接为:{1}<br>",html_a.InnerText,html_a.Attributes["href"].Value)); | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment