Skip to content

Instantly share code, notes, and snippets.

@jstacoder
Last active January 13, 2016 05:28
Show Gist options
  • Select an option

  • Save jstacoder/d9ef555fb7ef5498c709 to your computer and use it in GitHub Desktop.

Select an option

Save jstacoder/d9ef555fb7ef5498c709 to your computer and use it in GitHub Desktop.

Revisions

  1. jstacoder revised this gist Aug 14, 2015. 1 changed file with 9 additions and 4 deletions.
    13 changes: 9 additions & 4 deletions get_pdfs.py
    Original file line number Diff line number Diff line change
    @@ -1,4 +1,5 @@
    # requires twill==0.9
    import sys
    from twill import get_browser
    from pprint import pprint

    @@ -50,10 +51,15 @@ def save_file(self,name):
    def make_filename(self):
    name,junk,ext = map(str.strip,self.browser.get_title().split('-'))
    return str(name.replace(' ','_').replace('-','_').replace(',','') + '.' + ext).lower()

    def print_usage():
    print 'Usage: findpdf <TERM> <NUM> [outfile] - search for TERM and download NUM'
    sys.exit()

    def main():
    finder = PdfFinder()
    import sys
    if any(filter(lambda x: x=='-h' or x=='--help',sys.argv)):
    print_usage()
    if len(sys.argv) == 2:
    print finder.search(sys.argv[1])
    elif len(sys.argv) >= 3:
    @@ -64,7 +70,6 @@ def main():
    finder.save_file(name)
    print 'saved {} to file'.format(name)
    else:
    print 'Usage: findpdf <TERM> <NUM> [outfile] - search for TERM and download NUM'
    print_usage()
    if __name__ == "__main__":
    main()

    main()
  2. jstacoder revised this gist Aug 14, 2015. 1 changed file with 0 additions and 6 deletions.
    6 changes: 0 additions & 6 deletions get_pdfs.py
    Original file line number Diff line number Diff line change
    @@ -14,7 +14,6 @@ def __init__(self):
    self._b = self.browser._browser
    self._q = list(self._b.forms())[0]['q']


    def search(self,term):
    self.browser.get_form(1).set_value(term,'q')
    self.browser.get_form(1).set_value(['title'],'type')
    @@ -52,25 +51,20 @@ def make_filename(self):
    name,junk,ext = map(str.strip,self.browser.get_title().split('-'))
    return str(name.replace(' ','_').replace('-','_').replace(',','') + '.' + ext).lower()


    def main():
    finder = PdfFinder()
    import sys
    if len(sys.argv) == 2:
    print finder.search(sys.argv[1])
    #for itm in finder.links.items():
    # print itm
    elif len(sys.argv) >= 3:
    finder.search(sys.argv[1])
    finder.get_link(finder.links[finder.links.keys()[int(sys.argv[2])]])
    #print map(str.strip,finder.browser.get_title().split('-'))
    name = (len(sys.argv) == 4) and sys.argv[-1] or finder.make_filename()
    finder.get_link(finder.get_dl_link())
    finder.save_file(name)
    print 'saved {} to file'.format(name)
    else:
    print 'Usage: findpdf <TERM> <NUM> [outfile] - search for TERM and download NUM'
    #finder.get_link(finder.get_dl_link())
    if __name__ == "__main__":
    main()

  3. jstacoder created this gist Aug 13, 2015.
    76 changes: 76 additions & 0 deletions get_pdfs.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,76 @@
    # requires twill==0.9
    from twill import get_browser
    from pprint import pprint

    class PdfFinder:
    browser = None
    _b = None
    _q = None
    links = {}

    def __init__(self):
    self.browser = get_browser()
    self.browser.go('http://it-ebooks.info')
    self._b = self.browser._browser
    self._q = list(self._b.forms())[0]['q']


    def search(self,term):
    self.browser.get_form(1).set_value(term,'q')
    self.browser.get_form(1).set_value(['title'],'type')
    self.browser.submit('None')
    self.links = {
    x.text.split('\xe2\x86\x92')[0].strip():x for x in list(
    self._b.links()
    ) if 'book/' in x.url or '/search' in x.url
    }
    #print [x[1].attrs[-1][1].strip() for x in self.links.items()]
    while 'Next page' in [x[1].attrs[-1][1].strip() for x in self.links.items()]:
    self.get_link(self.links.pop('Next'))
    self.links.update({
    str(x.text.split('\xe2\x86\x92')[0].strip()):x for x in list(
    self._b.links()
    ) if 'book/' in x.url or '/search' in x.url
    })
    return '\n'.join(map(str,['{} {}'.format(n,x) for n,x in enumerate(self.links.keys())]))

    def get_link(self,link):
    self.browser.follow_link(link)

    def get_links(self):
    return '\n'.join(map(str,[x.url for x in list(self.browser._browser.links())]))

    def get_dl_link(self):
    return [x for x in list(self.browser._browser.links()) if 'filepi' in x.url][0]

    def save_file(self,name):
    f = open(name,'wb')
    f.write(self.browser.result.page)
    f.close()

    def make_filename(self):
    name,junk,ext = map(str.strip,self.browser.get_title().split('-'))
    return str(name.replace(' ','_').replace('-','_').replace(',','') + '.' + ext).lower()


    def main():
    finder = PdfFinder()
    import sys
    if len(sys.argv) == 2:
    print finder.search(sys.argv[1])
    #for itm in finder.links.items():
    # print itm
    elif len(sys.argv) >= 3:
    finder.search(sys.argv[1])
    finder.get_link(finder.links[finder.links.keys()[int(sys.argv[2])]])
    #print map(str.strip,finder.browser.get_title().split('-'))
    name = (len(sys.argv) == 4) and sys.argv[-1] or finder.make_filename()
    finder.get_link(finder.get_dl_link())
    finder.save_file(name)
    print 'saved {} to file'.format(name)
    else:
    print 'Usage: findpdf <TERM> <NUM> [outfile] - search for TERM and download NUM'
    #finder.get_link(finder.get_dl_link())
    if __name__ == "__main__":
    main()