Last active
August 29, 2015 14:13
-
-
Save SudShekhar/41253a3f19e609a9eb2e to your computer and use it in GitHub Desktop.
Revisions
-
SudShekhar revised this gist
Jan 14, 2015 . 1 changed file with 1 addition and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -32,4 +32,4 @@ def parse(self,response): l.add_xpath('link','a/@href') l.add_xpath('desc','text()') it= l.load_item() yield it -
SudShekhar revised this gist
Jan 14, 2015 . 1 changed file with 4 additions and 0 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -16,12 +16,16 @@ def parse(self,response): @returns requests 0 0 @scrapes title link """ # CODE 1 # for sel in response.xpath('//ul/li'): # item = DmozItem() # item['title'] = sel.xpath('a/text()').extract() # item['link'] = sel.xpath('a/@href').extract() # item['desc'] = sel.xpath('text()').extract() # yield item # CODE 2 for sel in response.xpath("//ul/li"): l = ItemLoader(item=DmozItem(),selector=sel) l.add_xpath('title','a/text()') -
SudShekhar renamed this gist
Jan 14, 2015 . 1 changed file with 0 additions and 0 deletions.There are no files selected for viewing
File renamed without changes. -
SudShekhar created this gist
Jan 14, 2015 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,31 @@ import scrapy from tutorial.items import DmozItem from scrapy.contrib.loader import ItemLoader class DmozSpider(scrapy.Spider): name="dmoz" allowed_domains=["dmoz.org"] start_urls=[ "http://www.dmoz.org/Computers/Programming/Languages/Python/Books/", "http://www.dmoz.org/Computers/Programming/Languages/Python/Resources/" ] def parse(self,response): """Some contracts are used here @url http://www.dmoz.org/Computers/Programming/Languages/Python/Books/ @returns items 1 100 @returns requests 0 0 @scrapes title link """ # for sel in response.xpath('//ul/li'): # item = DmozItem() # item['title'] = sel.xpath('a/text()').extract() # item['link'] = sel.xpath('a/@href').extract() # item['desc'] = sel.xpath('text()').extract() # yield item for sel in response.xpath("//ul/li"): l = ItemLoader(item=DmozItem(),selector=sel) l.add_xpath('title','a/text()') l.add_xpath('link','a/@href') l.add_xpath('desc','text()') it= l.load_item() yield i