Created
December 27, 2023 07:36
-
-
Save cpouldev/a48fcff63ad5ab95da845cea19e580c1 to your computer and use it in GitHub Desktop.
Revisions
-
cpouldev created this gist
Dec 27, 2023 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,58 @@ # -*- coding: utf-8 -*- from scrapy import Request from src.scraper.shops.spiders.base import SupermarketSpider def format_price(p): try: return float(p.strip()) except: return None class XalkiadakisSpider(SupermarketSpider): name = 'xalkiadakis' allowed_domains = ['xalkiadakis.gr'] start_urls = ['https://eshop.xalkiadakis.gr/'] def parse(self, response): cats = response.css('#mega-menu-primary li.mega-proiontamenu > ul > li > a.mega-menu-link').xpath( '@href').getall() for cat in cats: yield Request(url=cat, callback=self.parse_catalog) def parse_catalog(self, response): items = response.css('li.product') next_page = response.css('.page-numbers a.next').xpath('@href').get() for item in items: title = item.css('.woocommerce-loop-product__title::text').get() image_url = item.css('img').xpath('@src').get() sale_price = format_price(item.css('.sale_price bdi::text').get()) price = format_price(item.css('.price del bdi::text').get()) url = item.css('a.woocommerce-LoopProduct-link.woocommerce-loop-product__link').xpath('@href').get() image_item, image_hash = self.get_image_item(image_url) if price and not sale_price: sale_price = price if not price and not sale_price: continue yield image_item self.insert_item( item=title, key=url, sale_price=sale_price, price=price, url=url, image=image_hash ) if next_page: yield Request(url=next_page, callback=self.parse_catalog)