import scrapy from scrapy_splash import LuaRequest class BooksSpider(scrapy.Spider): name = 'books' allowed_domains = ['books.toscrape.com'] def start_requests(self): yield LuaRequest( 'http://books.toscrape.com/', lua_source=""" splash:go(args.url) for i=1,50 do splash:css(".next a"):click() splash:wait(1.0) end return splash:html() """ ) def parse(self, response): for book in response.css('article.product_pod'): yield { 'title': book.css('h3 a::text').get(), 'url': book.css('h3 a::attr(href)').get(), } class BooksSpider2(scrapy.Spider): name = 'books' allowed_domains = ['books.toscrape.com'] def start_requests(self): yield LuaRequest( 'http://books.toscrape.com/', lua_source=""" splash:go(args.url) for i=1,50 do res={} splash:css(".next a"):click() splash:wait(1.0) table.insert(res, splash:html()) end return res """ ) def parse(self, response): for html in response.data: resp = TextResponse(response.url, body=html, encoding='utf8') yield from self.parse_page(resp) def parse_page(self, response): for book in response.css('article.product_pod'): yield { 'title': book.css('h3 a::text').get(), 'url': book.css('h3 a::attr(href)').get(), } class BooksSpider3(scrapy.Spider): name = 'books' allowed_domains = ['books.toscrape.com'] def start_requests(self): yield SuperLuaRequest( 'http://books.toscrape.com/', lua_source=""" splash:go(args.url) for i=1,50 do splash:css(".next a"):click() splash:wait(1.0) splash:send(splash:html()) end """ ) def parse(self, response): for book in response.css('article.product_pod'): yield { 'title': book.css('h3 a::text').get(), 'url': book.css('h3 a::attr(href)').get(), }