Skip to content

Instantly share code, notes, and snippets.

@Nazarii
Last active August 29, 2015 14:12
Show Gist options
  • Select an option

  • Save Nazarii/94a2033531819d9969d4 to your computer and use it in GitHub Desktop.

Select an option

Save Nazarii/94a2033531819d9969d4 to your computer and use it in GitHub Desktop.
Scrap links & import to Wordpress
# -*- coding: cp1251 -*-
import httplib2
import MySQLdb as mdb
from BeautifulSoup import BeautifulSoup, SoupStrainer
STEP = 25
MAX_COUNTER = 3925
def import_links():
"""Method for scrapping images links for http://www.yaplakal.com
and importing to external wordpress engine"""
http = httplib2.Http()
connect = mdb.connect(host="hostname", user='username', passwd='passwd', db='dbname')
connect.escape_string("'")
cursor = connect.cursor()
counter = 0
for page in xrange(0, MAX_COUNTER, STEP):
status, response = http.request('http://www.yaplakal.com/forum2/st/%s/topic720705.html' % page)
for link in BeautifulSoup(response, parseOnlyThese=SoupStrainer('img')):
if link.has_key('alt'):
if 'pics_original' in link['src']:
content = """[caption id="" align="alignnone" width="657"]<img alt="Best comments from social networks" src=%r width="657" height="368" /> http://yaplakal.ru/[/caption]""" % str(link['src'])
query = '''UPDATE wp_posts SET post_title="Best comments from social networks", post_content=%r, post_status='publish', post_type='post'where id > ''' % content
cursor.execute(query)
connect.commit()
counter += 1
connect.close()
print "Totally imported %s links" % counter
if __name__ == '__main__':
import_links()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment