Skip to content

Instantly share code, notes, and snippets.

@Nazarii
Last active August 29, 2015 14:11
Show Gist options
  • Save Nazarii/e2cc4bcd15c5b3c8cdbc to your computer and use it in GitHub Desktop.
Save Nazarii/e2cc4bcd15c5b3c8cdbc to your computer and use it in GitHub Desktop.
Podcasts downloader
#!/usr/bin/env python
import urllib
import urllib2
import re
import os
__author__ = 'Nazarii Gudzovatyi <[email protected]>'
__description = """Simple script for downloading all podcasts
from The art of programming (http://taop.rpod.ru/) site.
It will create new directory called 'The_art_of_programming'
in the directory where this script wasrun from and download all mp3 files there."""
BASE_URL = 'http://taop.rpod.ru/'
DOWNLOAD_DIR = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'The_art_of_programming')
PAGES = 5
def download_podcasts():
download_links = []
if not os.path.isdir(DOWNLOAD_DIR):
os.makedirs(DOWNLOAD_DIR)
for page_number in xrange(1, PAGES + 1):
page_url = '%s/?page=%d' % (BASE_URL, page_number)
website = urllib2.urlopen(page_url)
html = website.read()
links = re.findall('"(http://rpod.ru/get/.*?mp3)"', html)
download_links.extend(links)
for download_link in list(set(download_links)):
file_name = download_link.split('/')[-1]
file_name = file_name[4:file_name.find('%D1')]
file_name = os.path.join(DOWNLOAD_DIR, file_name)
print 'Downloading %s' % file_name
urllib.urlretrieve(download_link, file_name)
print "Downloaded %d podcasts" % len(set(download_links))
if __name__ == '__main__':
download_podcasts()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment