Skip to content

Instantly share code, notes, and snippets.

@cpouldev
Created December 27, 2023 07:36
Show Gist options
  • Select an option

  • Save cpouldev/a48fcff63ad5ab95da845cea19e580c1 to your computer and use it in GitHub Desktop.

Select an option

Save cpouldev/a48fcff63ad5ab95da845cea19e580c1 to your computer and use it in GitHub Desktop.

Revisions

  1. cpouldev created this gist Dec 27, 2023.
    58 changes: 58 additions & 0 deletions xalkiadakis.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,58 @@
    # -*- coding: utf-8 -*-

    from scrapy import Request

    from src.scraper.shops.spiders.base import SupermarketSpider


    def format_price(p):
    try:
    return float(p.strip())
    except:
    return None


    class XalkiadakisSpider(SupermarketSpider):
    name = 'xalkiadakis'
    allowed_domains = ['xalkiadakis.gr']
    start_urls = ['https://eshop.xalkiadakis.gr/']

    def parse(self, response):
    cats = response.css('#mega-menu-primary li.mega-proiontamenu > ul > li > a.mega-menu-link').xpath(
    '@href').getall()

    for cat in cats:
    yield Request(url=cat, callback=self.parse_catalog)

    def parse_catalog(self, response):
    items = response.css('li.product')
    next_page = response.css('.page-numbers a.next').xpath('@href').get()

    for item in items:
    title = item.css('.woocommerce-loop-product__title::text').get()
    image_url = item.css('img').xpath('@src').get()
    sale_price = format_price(item.css('.sale_price bdi::text').get())
    price = format_price(item.css('.price del bdi::text').get())
    url = item.css('a.woocommerce-LoopProduct-link.woocommerce-loop-product__link').xpath('@href').get()

    image_item, image_hash = self.get_image_item(image_url)

    if price and not sale_price:
    sale_price = price

    if not price and not sale_price:
    continue

    yield image_item

    self.insert_item(
    item=title,
    key=url,
    sale_price=sale_price,
    price=price,
    url=url,
    image=image_hash
    )

    if next_page:
    yield Request(url=next_page, callback=self.parse_catalog)