Last active
October 9, 2017 15:36
-
-
Save Tafkas/3ca874700d589107b9c04619fd73f9eb to your computer and use it in GitHub Desktop.
Revisions
-
Tafkas revised this gist
Oct 9, 2017 . 1 changed file with 4 additions and 7 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -45,9 +45,8 @@ def fetch_amazon_rating(isbn13): s = requests.Session() response = s.get('https://www.amazon.com') cookies = dict(response.cookies) url = '''https://www.amazon.com/s/ref=nb_sb_noss?url=search-alias%3Daps&field-keywords={}'''.format(isbn13) response = requests.get(url, headers=HEADER, cookies=cookies) if response.status_code == 503: response.raise_for_status() except requests.exceptions.HTTPError as e: @@ -63,9 +62,7 @@ def fetch_amazon_rating(isbn13): if len(rating_matches) > 0: amazon_rating = float(rating_matches[0]) # get number of ratings # number_of_ratings = tree.xpath('''/html/body/div[1]/div[3]/div/div[3]/div[2]/div/div[4]/div[1]/div/ul/li/div/div[3]/div[4]/a''')[0].text # get asin (is either isbn10 or amazon specific) tmp = tree.xpath('//*[contains(concat( " ", @class, " " ), concat( " ", "a-link-normal a-text-normal", " " ))]')[0] @@ -103,7 +100,7 @@ def fetch_ebook(): asin, amazon_rating, number_of_ratings = fetch_amazon_rating(isbn13) if amazon_rating is not None: stars = ':star:' * int(round(amazon_rating, 0)) amazon_text = ("{title} has been rated \n {rating} out of 5 stars {stars} on " "<https://www.amazon.de/dp/{asin}?tag=de125725875-21|Amazon.com>." .format(title=fix_string(title), rating=amazon_rating, -
Tafkas revised this gist
Oct 8, 2017 . 1 changed file with 17 additions and 11 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -58,12 +58,14 @@ def fetch_amazon_rating(isbn13): # get rating rating_regex = re.compile('(\d+(\.\d+)?)') rating_matches = [m.group(0) for foo in tmp for m in [rating_regex.search(foo.text)] if m] amazon_rating, number_of_ratings = None, None if len(rating_matches) > 0: amazon_rating = float(rating_matches[0]) # get number of ratings number_of_ratings = tree.xpath('''//*[contains(concat( " ", @class, " " ), concat( " ", "a-span-last", " " ))] //*[contains(concat( " ", @class, " " ), concat( " ", "a-size-small", " " )) and contains(concat( " ", @class, " " ), concat( " ", "a-text-normal", " " ))]''')[0].text # get asin (is either isbn10 or amazon specific) tmp = tree.xpath('//*[contains(concat( " ", @class, " " ), concat( " ", "a-link-normal a-text-normal", " " ))]')[0] @@ -109,12 +111,16 @@ def fetch_ebook(): number_of_ratings=number_of_ratings, asin=asin)) else: amazon_text = ("{title} has not been rated on " "<https://www.amazon.de/dp/{asin}?tag=de125725875-21|Amazon.com> yet." .format(title=fix_string(title), asin=asin)) amazon_attachment = { "title": "Amazon Rating", "text": amazon_text } for slack_name in OUTPUT: payload = {"channel": OUTPUT[slack_name]['channel'], -
Tafkas revised this gist
Aug 18, 2017 . 1 changed file with 1 addition and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -91,7 +91,7 @@ def fetch_ebook(): cover_image = tree.xpath('''//*[contains(concat( " ", @class, " " ), concat( " ", "imagecache-dotd_main_image", " " ))]''') image_url = 'https:/{}'.format(fix_string(cover_image[0].attrib['src'][1:])).replace(' ', '%20') detail_page = ( tree.xpath('''//*[contains(concat( " ", @class, " " ), concat( " ", "dotd-main-book-image", " " ))]//a''') -
Tafkas revised this gist
Aug 3, 2017 . 1 changed file with 15 additions and 5 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -41,9 +41,19 @@ def fetch_amazon_rating(isbn13): :param isbn13: An ISBN13 number :return: asin number and the Amazon rating and number of ratings """ try: s = requests.Session() response = s.get('https://www.amazon.com') cookies = dict(response.cookies) response = requests.get('''https://www.amazon.com/s/ref=nb_sb_noss? url=search-alias%3Daps&field-keywords={}'''.format(isbn13), headers=HEADER, cookies=cookies) if response.status_code == 503: response.raise_for_status() except requests.exceptions.HTTPError as e: print "oops something unexpected happened: {}".format(e) tree = html.fromstring(response.text) tmp = tree.xpath('//*[contains(concat( " ", @class, " " ), concat( " ", "a-icon-alt", " " ))]') # get rating @@ -52,7 +62,7 @@ def fetch_amazon_rating(isbn13): # get number of ratings number_of_ratings = tree.xpath('''//*[contains(concat( " ", @class, " " ), concat( " ", "a-span-last", " " ))] //*[contains(concat( " ", @class, " " ), concat( " ", "a-size-small", " " )) and contains(concat( " ", @class, " " ), concat( " ", "a-text-normal", " " ))]''')[0].text # get asin (is either isbn10 or amazon specific) @@ -78,7 +88,7 @@ def fetch_ebook(): title = tree.xpath('//*[(@id = "deal-of-the-day")]//h2')[0].text.strip() description = tree.xpath('''//*[(@id = "deal-of-the-day")] //div[(((count(preceding-sibling::*) + 1) = 4) and parent::*)]''')[0].text.strip() cover_image = tree.xpath('''//*[contains(concat( " ", @class, " " ), concat( " ", "imagecache-dotd_main_image", " " ))]''') image_url = 'https:/{}'.format(cover_image[0].attrib['src'][1:]).replace(' ', '%20') -
Tafkas revised this gist
Jul 30, 2017 . No changes.There are no files selected for viewing
-
Tafkas revised this gist
Jul 27, 2017 . 1 changed file with 0 additions and 2 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,6 +1,5 @@ import json import re import requests from lxml import html @@ -84,7 +83,6 @@ def fetch_ebook(): image_url = 'https:/{}'.format(cover_image[0].attrib['src'][1:]).replace(' ', '%20') detail_page = ( tree.xpath('''//*[contains(concat( " ", @class, " " ), concat( " ", "dotd-main-book-image", " " ))]//a''') [0].attrib['href']) -
Tafkas revised this gist
Jul 27, 2017 . 1 changed file with 21 additions and 11 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,5 +1,6 @@ import json import re import urllib import requests from lxml import html @@ -37,28 +38,36 @@ def fetch_isbn13(detail_page_url): def fetch_amazon_rating(isbn13): """Fetches the Amazon Rating, number of ratings and ASIN from Amazon :param isbn13: An ISBN13 number :return: asin number and the Amazon rating and number of ratings """ page = requests.get('''https://www.amazon.com/s/ref=nb_sb_noss? url=search-alias%3Daps&field-keywords={}'''.format(isbn13), headers=HEADER) tree = html.fromstring(page.text) tmp = tree.xpath('//*[contains(concat( " ", @class, " " ), concat( " ", "a-icon-alt", " " ))]') # get rating rating_regex = re.compile('(\d+(\.\d+)?)') amazon_rating = float([m.group(0) for foo in tmp for m in [rating_regex.search(foo.text)] if m][0]) # get number of ratings number_of_ratings = tree.xpath('''//*[contains(concat( " ", @class, " " ), concat( " ", "a-span-last", " " ))] //*[contains(concat( " ", @class, " " ), concat( " ", "a-size-small", " " )) and contains(concat( " ", @class, " " ), concat( " ", "a-text-normal", " " ))]''')[0].text # get asin (is either isbn10 or amazon specific) tmp = tree.xpath('//*[contains(concat( " ", @class, " " ), concat( " ", "a-link-normal a-text-normal", " " ))]')[0] m = re.search('\d{10}', tmp.attrib['href']) asin = None if m: asin = m.group(0) else: m = re.search('(B[0-9]{2}[0-9A-Z]{7}|[0-9]{9}(X|0-9]))', tmp.attrib['href']) if m: asin = m.group(0) return asin, amazon_rating, number_of_ratings def fetch_ebook(): @@ -73,23 +82,24 @@ def fetch_ebook(): cover_image = tree.xpath('''//*[contains(concat( " ", @class, " " ), concat( " ", "imagecache-dotd_main_image", " " ))]''') image_url = 'https:/{}'.format(cover_image[0].attrib['src'][1:]).replace(' ', '%20') detail_page = ( tree.xpath('''//*[contains(concat( " ", @class, " " ), concat( " ", "dotd-main-book-image", " " ))]//a''') [0].attrib['href']) isbn13 = fetch_isbn13(BASEURL + detail_page) asin, amazon_rating, number_of_ratings = fetch_amazon_rating(isbn13) if amazon_rating is not None: stars = ':star:' * int(round(amazon_rating, 0)) amazon_text = ("{title} has been rated \n {rating} out of 5 stars {stars} by {number_of_ratings} people on " "<https://www.amazon.de/dp/{asin}?tag=de125725875-21|Amazon.com>." .format(title=fix_string(title), rating=amazon_rating, stars=stars, number_of_ratings=number_of_ratings, asin=asin)) amazon_attachment = { "title": "Amazon Rating", -
Tafkas revised this gist
Jul 27, 2017 . 1 changed file with 2 additions and 2 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -28,7 +28,7 @@ def fix_string(foo): def fetch_isbn13(detail_page_url): """Fetches the ISBN13 number from the Packt Book detail page :param detail_page_url: the url of the packt book detal page :return: the ISBN13 of the book """ page = requests.get(detail_page_url, headers=HEADER) tree = html.fromstring(page.text) @@ -82,7 +82,7 @@ def fetch_ebook(): isbn10, amazon_rating, number_of_ratings = fetch_amazon_rating(isbn13) if amazon_rating is not None: stars = ':star:' * int(round(amazon_rating, 0)) amazon_text = ("{title} has been rated \n {rating} out of 5 stars {stars} by {number_of_ratings} people on " "<https://www.amazon.de/dp/{isbn10}?tag=de125725875-21|Amazon>." .format(title=fix_string(title), -
Tafkas revised this gist
Jul 27, 2017 . 1 changed file with 4 additions and 4 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -70,14 +70,14 @@ def fetch_ebook(): title = tree.xpath('//*[(@id = "deal-of-the-day")]//h2')[0].text.strip() description = tree.xpath('''//*[(@id = "deal-of-the-day")] //div[(((count(preceding-sibling::*) + 1) = 4) and parent::*)]''')[0].text.strip() cover_image = tree.xpath('''//*[contains(concat( " ", @class, " " ), concat( " ", "imagecache-dotd_main_image", " " ))]''') image_url = 'http:/{}'.format(cover_image[0].attrib['src'][1:]) detail_page = ( tree.xpath('''//*[contains(concat( " ", @class, " " ), concat( " ", "dotd-main-book-image", " " ))]//a''') [0].attrib['href']) isbn13 = fetch_isbn13(BASEURL + detail_page) isbn10, amazon_rating, number_of_ratings = fetch_amazon_rating(isbn13) -
Tafkas revised this gist
Jul 27, 2017 . 1 changed file with 20 additions and 15 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -37,25 +37,28 @@ def fetch_isbn13(detail_page_url): def fetch_amazon_rating(isbn13): """Fetches the Amazon Rating, number of ratings and ISBN10 from Amazon :param isbn13: An ISBN13 number :return: and ISBN10 number and the Amazon rating """ page = requests.get('''https://www.amazon.com/s/ref=nb_sb_noss? url=search-alias%3Daps&field-keywords={}'''.format(isbn13), headers=HEADER) tree = html.fromstring(page.text) tmp = tree.xpath('//*[contains(concat( " ", @class, " " ), concat( " ", "a-icon-alt", " " ))]') # get rating rating_regex = re.compile('\d\.\d') amazon_rating = float([m.group(0) for foo in tmp for m in [rating_regex.search(foo.text)] if m][0]) # get number of ratings number_of_ratings = tree.xpath('''//*[contains(concat( " ", @class, " " ), concat( " ", "a-span-last", " " ))] //*[contains(concat( " ", @class, " " ), concat( " ", "a-size-small", " " )) and contains(concat( " ", @class, " " ), concat( " ", "a-text-normal", " " ))]''')[0].text # get isbn 10 tmp = tree.xpath('//*[contains(concat( " ", @class, " " ), concat( " ", "a-link-normal a-text-normal", " " ))]')[0] m = re.search('\d{10}', tmp.attrib['href']) isbn10 = None if m: isbn10 = m.group(0) return isbn10, amazon_rating, number_of_ratings def fetch_ebook(): @@ -65,9 +68,8 @@ def fetch_ebook(): page = requests.get(free_book_url, headers=HEADER) tree = html.fromstring(page.text) title = tree.xpath('//*[(@id = "deal-of-the-day")]//h2')[0].text.strip() description = tree.xpath('''//*[(@id = "deal-of-the-day")] //div[(((count(preceding-sibling::*) + 1) = 4) and parent::*)]''')[0].text.strip() cover_image = tree.xpath( '//*[contains(concat( " ", @class, " " ), concat( " ", "imagecache-dotd_main_image", " " ))]') @@ -78,14 +80,16 @@ def fetch_ebook(): 0].attrib['href']) isbn13 = fetch_isbn13(BASEURL + detail_page) isbn10, amazon_rating, number_of_ratings = fetch_amazon_rating(isbn13) if amazon_rating is not None: stars = ':star:' * int(amazon_rating) amazon_text = ("{title} has been rated \n {rating} out of 5 stars {stars} by {number_of_ratings} people on " "<https://www.amazon.de/dp/{isbn10}?tag=de125725875-21|Amazon>." .format(title=fix_string(title), rating=amazon_rating, stars=stars, number_of_ratings=number_of_ratings, isbn10=isbn10)) amazon_attachment = { "title": "Amazon Rating", @@ -106,7 +110,8 @@ def fetch_ebook(): amazon_attachment] } r = requests.post(OUTPUT[slack_name]['webhook'], data=json.dumps(payload), headers={"content-type": "text/javascript"}) if r.status_code != 200: print r -
Tafkas revised this gist
Jul 26, 2017 . 1 changed file with 70 additions and 8 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,15 +1,16 @@ import json import re import requests from lxml import html BASEURL = 'https://www.packtpub.com' HEADER = {'User-Agent': 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)'} OUTPUT = {'pav': {'channel': "#ebook-monkey", 'webhook': "https://hooks.slack.com/services/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"}, # 'pav-family': {'channel': "#my-channel", # 'webhook': "https://hooks.slack.com/services/...."} } SLACK_USERNAME = "ebook monkey" @@ -24,30 +25,91 @@ def fix_string(foo): return foo def fetch_isbn13(detail_page_url): """Fetches the ISBN13 number from the Packt Book detail page :param detail_page_url: the url of the packt book detal page :return: """ page = requests.get(detail_page_url, headers=HEADER) tree = html.fromstring(page.text) isbn13 = tree.xpath('//*[contains(concat( " ", @itemprop, " " ), concat( " ", "isbn", " " ))]')[0].text return isbn13 def fetch_amazon_rating(isbn13): """Fetches the Amazon Rating and ISBN10 from Amazon :param isbn13: An ISBN13 number :return: and ISBN10 number and the Amazon rating """ page = requests.get( 'https://www.amazon.com/s/ref=nb_sb_noss?url=search-alias%3Daps&field-keywords={}'.format(isbn13), headers=HEADER) tree = html.fromstring(page.text) tmp = tree.xpath('//*[contains(concat( " ", @class, " " ), concat( " ", "a-icon-alt", " " ))]') # get rating rating_regex = re.compile('\d\.\d') amazon_rating = float([m.group(0) for foo in tmp for m in [rating_regex.search(foo.text)] if m][0]) # get isbn 10 tmp = tree.xpath('//*[contains(concat( " ", @class, " " ), concat( " ", "a-link-normal a-text-normal", " " ))]')[0] m = re.search('\d{10}', tmp.attrib['href']) isbn10 = None if m: isbn10 = m.group(0) return isbn10, amazon_rating def fetch_ebook(): """Fetches the Packt free ebook of the day, enriches the output with information from Amazon and posts it to Slack """ free_book_url = BASEURL + '/packt/offers/free-learning' page = requests.get(free_book_url, headers=HEADER) tree = html.fromstring(page.text) title = tree.xpath('//*[(@id = "deal-of-the-day")]//h2')[0].text.strip() description = tree.xpath( '//*[(@id = "deal-of-the-day")]//div[(((count(preceding-sibling::*) + 1) = 4) and parent::*)]')[ 0].text.strip() cover_image = tree.xpath( '//*[contains(concat( " ", @class, " " ), concat( " ", "imagecache-dotd_main_image", " " ))]') image_url = 'http:/{}'.format(cover_image[0].attrib['src'][1:]) detail_page = ( tree.xpath('//*[contains(concat( " ", @class, " " ), concat( " ", "dotd-main-book-image", " " ))]//a')[ 0].attrib['href']) isbn13 = fetch_isbn13(BASEURL + detail_page) isbn10, amazon_rating = fetch_amazon_rating(isbn13) if amazon_rating is not None: stars = ':star:' * int(amazon_rating) amazon_text = ("{title} has been rated \n {rating} out of 5 stars {stars} on " "<https://www.amazon.de/dp/{isbn10}?tag=de125725875-21|Amazon>.".format(title=fix_string(title), rating=amazon_rating, stars=stars, isbn10=isbn10)) amazon_attachment = { "title": "Amazon Rating", "text": amazon_text } else: amazon_attachment = {} for slack_name in OUTPUT: payload = {"channel": OUTPUT[slack_name]['channel'], "username": SLACK_USERNAME, "icon_emoji": SLACK_ICON_EMOJI, "attachments": [{ "title": "Free ebook today: {title}".format(title=fix_string(title)), "title_link": free_book_url, "text": description, "image_url": image_url}, amazon_attachment] } r = requests.post(OUTPUT[slack_name]['webhook'], data=json.dumps(payload), headers={"content-type": "text/javascript"}) if r.status_code != 200: print r if __name__ == '__main__': -
Tafkas created this gist
Jun 26, 2017 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,54 @@ import json import requests from lxml import html URL = 'https://www.packtpub.com/packt/offers/free-learning' HEADER = {'User-Agent': 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)'} OUTPUT = {'pav': {'channel': "#my-channel", 'webhook': "https://hooks.slack.com/services/...."}, 'pav-family': {'channel': "#my-channel", 'webhook': "https://hooks.slack.com/services/...."} } SLACK_USERNAME = "ebook monkey" SLACK_ICON_EMOJI = ":monkey_face:" def fix_string(foo): if isinstance(foo, basestring): foo = foo.encode('utf8') else: foo = unicode(foo).encode('utf8') return foo def fetch_ebook(): page = requests.get(URL, headers=HEADER) tree = html.fromstring(page.text) title = tree.xpath('//*[(@id = "deal-of-the-day")]//h2')[0].text.strip() description = tree.xpath( '//*[(@id = "deal-of-the-day")]//div[(((count(preceding-sibling::*) + 1) = 4) and parent::*)]')[ 0].text.strip() cover_image = tree.xpath( '//*[contains(concat( " ", @class, " " ), concat( " ", "imagecache-dotd_main_image", " " ))]') image_url = 'http:/{}'.format(cover_image[0].attrib['src'][1:]) for slack_name in OUTPUT: payload = {"channel": OUTPUT[slack_name]['channel'], "username": SLACK_USERNAME, "icon_emoji": SLACK_ICON_EMOJI, "attachments": [{ "title": "Free ebook today: {title}".format(title=fix_string(title)), "title_link": URL, "text": description, "image_url": image_url}] } r = requests.post(OUTPUT[slack_name]['webhook'], data=json.dumps(payload), headers={"content-type": "text/javascript"}) if __name__ == '__main__': fetch_ebook()