import scrapy from tutorial.items import DmozItem from scrapy.contrib.loader import ItemLoader class DmozSpider(scrapy.Spider): name="dmoz" allowed_domains=["dmoz.org"] start_urls=[ "http://www.dmoz.org/Computers/Programming/Languages/Python/Books/", "http://www.dmoz.org/Computers/Programming/Languages/Python/Resources/" ] def parse(self,response): """Some contracts are used here @url http://www.dmoz.org/Computers/Programming/Languages/Python/Books/ @returns items 1 100 @returns requests 0 0 @scrapes title link """ # CODE 1 # for sel in response.xpath('//ul/li'): # item = DmozItem() # item['title'] = sel.xpath('a/text()').extract() # item['link'] = sel.xpath('a/@href').extract() # item['desc'] = sel.xpath('text()').extract() # yield item # CODE 2 for sel in response.xpath("//ul/li"): l = ItemLoader(item=DmozItem(),selector=sel) l.add_xpath('title','a/text()') l.add_xpath('link','a/@href') l.add_xpath('desc','text()') it= l.load_item() yield it