本帖最后由 jefferic 于 2014-9-26 20:43 编辑
使用 HTML Agility Pack 搭配 ScrapySharp 会更便捷
类似jquery筛选器的写法
[C#] 纯文本查看 复制代码 Uri uri = new Uri("http://www.baidu.com");
ScrapingBrowser browser1 = new ScrapingBrowser();
String htmlStr = browser1.DownloadString(uri);
HtmlAgilityPack.HtmlDocument htmlDocument = new HtmlAgilityPack.HtmlDocument();
htmlDocument.LoadHtml(htmlStr);
HtmlNode html = htmlDocument.DocumentNode;
IEnumerable<HtmlNode> nodes = html.CssSelect("div"); //all div elements
//nodes = html.CssSelect("div.content"); //all div elements with css class ‘content’
//nodes = html.CssSelect("div.widget.monthlist"); //all div elements with the both css class
//nodes = html.CssSelect("#postPaging"); //all HTML elements with the id postPaging
//nodes = html.CssSelect("div#postPaging.testClass"); // all HTML elements with the id postPaging and css class testClass
//nodes = html.CssSelect("div.content > p.para"); //p elements who are direct children of div elements with css class ‘content’
//nodes = html.CssSelect("input[type=text].login"); // textbox with css class login
//nodes = html.CssSelect("p.para").CssSelectAncestors("div.content > div.widget");
|