Skip to content

Instantly share code, notes, and snippets.

@SudShekhar
Last active August 29, 2015 14:13
Show Gist options
  • Save SudShekhar/41253a3f19e609a9eb2e to your computer and use it in GitHub Desktop.
Save SudShekhar/41253a3f19e609a9eb2e to your computer and use it in GitHub Desktop.
Item Loader vs normal items in scrapy
import scrapy
from tutorial.items import DmozItem
from scrapy.contrib.loader import ItemLoader
class DmozSpider(scrapy.Spider):
name="dmoz"
allowed_domains=["dmoz.org"]
start_urls=[
"http://www.dmoz.org/Computers/Programming/Languages/Python/Books/",
"http://www.dmoz.org/Computers/Programming/Languages/Python/Resources/"
]
def parse(self,response):
"""Some contracts are used here
@url http://www.dmoz.org/Computers/Programming/Languages/Python/Books/
@returns items 1 100
@returns requests 0 0
@scrapes title link
"""
# CODE 1
# for sel in response.xpath('//ul/li'):
# item = DmozItem()
# item['title'] = sel.xpath('a/text()').extract()
# item['link'] = sel.xpath('a/@href').extract()
# item['desc'] = sel.xpath('text()').extract()
# yield item
# CODE 2
for sel in response.xpath("//ul/li"):
l = ItemLoader(item=DmozItem(),selector=sel)
l.add_xpath('title','a/text()')
l.add_xpath('link','a/@href')
l.add_xpath('desc','text()')
it= l.load_item()
yield it
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment