Last active
January 13, 2016 05:28
-
-
Save jstacoder/d9ef555fb7ef5498c709 to your computer and use it in GitHub Desktop.
Revisions
-
jstacoder revised this gist
Aug 14, 2015 . 1 changed file with 9 additions and 4 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,4 +1,5 @@ # requires twill==0.9 import sys from twill import get_browser from pprint import pprint @@ -50,10 +51,15 @@ def save_file(self,name): def make_filename(self): name,junk,ext = map(str.strip,self.browser.get_title().split('-')) return str(name.replace(' ','_').replace('-','_').replace(',','') + '.' + ext).lower() def print_usage(): print 'Usage: findpdf <TERM> <NUM> [outfile] - search for TERM and download NUM' sys.exit() def main(): finder = PdfFinder() if any(filter(lambda x: x=='-h' or x=='--help',sys.argv)): print_usage() if len(sys.argv) == 2: print finder.search(sys.argv[1]) elif len(sys.argv) >= 3: @@ -64,7 +70,6 @@ def main(): finder.save_file(name) print 'saved {} to file'.format(name) else: print_usage() if __name__ == "__main__": main() -
jstacoder revised this gist
Aug 14, 2015 . 1 changed file with 0 additions and 6 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -14,7 +14,6 @@ def __init__(self): self._b = self.browser._browser self._q = list(self._b.forms())[0]['q'] def search(self,term): self.browser.get_form(1).set_value(term,'q') self.browser.get_form(1).set_value(['title'],'type') @@ -52,25 +51,20 @@ def make_filename(self): name,junk,ext = map(str.strip,self.browser.get_title().split('-')) return str(name.replace(' ','_').replace('-','_').replace(',','') + '.' + ext).lower() def main(): finder = PdfFinder() import sys if len(sys.argv) == 2: print finder.search(sys.argv[1]) elif len(sys.argv) >= 3: finder.search(sys.argv[1]) finder.get_link(finder.links[finder.links.keys()[int(sys.argv[2])]]) name = (len(sys.argv) == 4) and sys.argv[-1] or finder.make_filename() finder.get_link(finder.get_dl_link()) finder.save_file(name) print 'saved {} to file'.format(name) else: print 'Usage: findpdf <TERM> <NUM> [outfile] - search for TERM and download NUM' if __name__ == "__main__": main() -
jstacoder created this gist
Aug 13, 2015 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,76 @@ # requires twill==0.9 from twill import get_browser from pprint import pprint class PdfFinder: browser = None _b = None _q = None links = {} def __init__(self): self.browser = get_browser() self.browser.go('http://it-ebooks.info') self._b = self.browser._browser self._q = list(self._b.forms())[0]['q'] def search(self,term): self.browser.get_form(1).set_value(term,'q') self.browser.get_form(1).set_value(['title'],'type') self.browser.submit('None') self.links = { x.text.split('\xe2\x86\x92')[0].strip():x for x in list( self._b.links() ) if 'book/' in x.url or '/search' in x.url } #print [x[1].attrs[-1][1].strip() for x in self.links.items()] while 'Next page' in [x[1].attrs[-1][1].strip() for x in self.links.items()]: self.get_link(self.links.pop('Next')) self.links.update({ str(x.text.split('\xe2\x86\x92')[0].strip()):x for x in list( self._b.links() ) if 'book/' in x.url or '/search' in x.url }) return '\n'.join(map(str,['{} {}'.format(n,x) for n,x in enumerate(self.links.keys())])) def get_link(self,link): self.browser.follow_link(link) def get_links(self): return '\n'.join(map(str,[x.url for x in list(self.browser._browser.links())])) def get_dl_link(self): return [x for x in list(self.browser._browser.links()) if 'filepi' in x.url][0] def save_file(self,name): f = open(name,'wb') f.write(self.browser.result.page) f.close() def make_filename(self): name,junk,ext = map(str.strip,self.browser.get_title().split('-')) return str(name.replace(' ','_').replace('-','_').replace(',','') + '.' + ext).lower() def main(): finder = PdfFinder() import sys if len(sys.argv) == 2: print finder.search(sys.argv[1]) #for itm in finder.links.items(): # print itm elif len(sys.argv) >= 3: finder.search(sys.argv[1]) finder.get_link(finder.links[finder.links.keys()[int(sys.argv[2])]]) #print map(str.strip,finder.browser.get_title().split('-')) name = (len(sys.argv) == 4) and sys.argv[-1] or finder.make_filename() finder.get_link(finder.get_dl_link()) finder.save_file(name) print 'saved {} to file'.format(name) else: print 'Usage: findpdf <TERM> <NUM> [outfile] - search for TERM and download NUM' #finder.get_link(finder.get_dl_link()) if __name__ == "__main__": main()