skmezanul · December 30, 2019 18:50 · Dec 30, 2019 · Dec 30, 2019 · Dec 30, 2019 · Nov 11, 2019
diff --git a/calisuck.py b/calisuck.py
@@ -682,20 +682,6 @@ def download_ebooks(dir= 'my_books', formats=[], single_format=False, ignored_fo
     print()
     print("Reporting ...")
 
-    print()
-    table = BeautifulTable()
-    table.column_headers = ["", "Total count"]
-    table.append_row(["Formats", total_format_count])
-    table.append_row(["Ebooks", total_ebook_count])
-    print(table)
-
-    print()
-    table = BeautifulTable()
-    table.column_headers = ["", "Size"]
-    table.append_row(["Min", hsize(size_min)])
-    table.append_row(["Max", hsize(size_max)])
-    table.append_row(["Total",  hsize(total_size)])
-    print(table)
 
     print()
     print("Total ebooks updated by language:")
@@ -729,6 +715,21 @@ def download_ebooks(dir= 'my_books', formats=[], single_format=False, ignored_fo
         table.append_row([f, hsize(s)])
     print(table)
 
+    print()
+    table = BeautifulTable()
+    table.column_headers = ["", "Total count"]
+    table.append_row(["Formats", total_format_count])
+    table.append_row(["Ebooks", total_ebook_count])
+    print(table)
+
+    print()
+    table = BeautifulTable()
+    table.column_headers = ["", "Size"]
+    table.append_row(["Min", hsize(size_min)])
+    table.append_row(["Max", hsize(size_max)])
+    table.append_row(["Total",  hsize(total_size)])
+    print(table)
+
     print()
     print("Done !!!")
 

diff --git a/calisuck.py b/calisuck.py
@@ -1,3 +1,44 @@
+#!/usr/bin/env python3
+
+'''
+calisuck: index, filter-out smartly and download ebooks from Calibre open directories
+
+Installation:
+
+    You need python 3.5 installed
+
+    Download the file as a zip and unzip-it and get into the dir
+
+    OR
+
+    > git clone https://gist.github.com/b7e814d7189db9ee1d6b9c1d1a1de95c.git
+    > mv b7e814d7189db9ee1d6b9c1d1a1de95c calisuck
+    > cd calisuck
+    > 
+
+    THEN
+
+    > python3 -m venv .
+    > . bin/activate
+    > pip install requests fire humanize langid iso639 beautifultable
+    > python calisuck.py --help
+    > python calisuck.py index-ebooks --help
+    > python calisuck.py download-ebooks --help
+    > python calisuck.py download-covers --help
+'''
+
+'''
+   DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE 
+                    Version 2, December 2004 
+ Copyright (C) 2004 Sam Hocevar <[email protected]> 
+ Everyone is permitted to copy and distribute verbatim or modified 
+ copies of this license document, and changing it is allowed as long 
+ as the name is changed. 
+            DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE 
+   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 
+  0. You just DO WHAT THE FUCK YOU WANT TO.
+'''
+
 import sys
 import os
 import time
@@ -223,7 +264,7 @@ def index_ebooks(site, library="", start=0, stop=0, dir="my_books", inc=1000, fo
     --library=<string>  (default=my_books)  :   Id of library to index. The script index the default library by default.
                                                 The id is string following '&library_id=' in the url
 
-    --force_refresh     (defaul=False)      :   Force a refresh of the metadata. By default all the metdata 
+    --force-refresh     (defaul=False)      :   Force a refresh of the metadata. By default all the metdata 
                                                 already gathered are ignored
 
     --start=<int>       (default=0)
@@ -236,8 +277,8 @@ def index_ebooks(site, library="", start=0, stop=0, dir="my_books", inc=1000, fo
 
     offset= 0 if not start else start-1
     num=min(1000,inc)
-    server=site
-    api=server+'ajax/'
+    server=site.rstrip('/')
+    api=server+'/ajax/'
     library= '/'+library if library else library 
 
     print("Server:", server)
@@ -460,11 +501,53 @@ def has_identifiers(book, identifiers=[], ignore_empty_identifiers=False):
     return True
 
 def download_ebooks(dir= 'my_books', formats=[], single_format=False, ignored_formats=[], languages=[], identifiers=[], min_size=0, max_size=0, ignore_empty_language=False, ignore_empty_identifiers=False, dry_run=False, map="", map_lib=""):
+    '''
+    Download ebooks in matching subdirs:
+    
+    The different formats of the same book are groupe in the same directory 
+    with an UUID name close to the metadata file (metadata.json). 
+
+    The status of the formats for a book and its global status are initially set to 'todo'.
+    They move to 'done' after their download. This allows you to rerun the download and progressively collect books. 
+
+    You can use different options to filter the formats for the download 
+    by language, size, format and identifiers(isbn, ...).  
+
+    A report of the download is displayed at the end of the process. 
+    You can run this command in dry mode (--dry-run) with different settings 
+    to only display the report and prepare your effective.  
+
+    Params:
+
+    --min-size=<int>            (default=0)
+    --max-size=<int>            (default=infinity)  :   Delimit the size in MB for the accepted formats    
+    --dry-run                   (defaul=False)      :   Run the command to simulate the download   
+    --language=<string>                             :   Restrict the download to a list of specific languages 
+                                                        (Ex: --languages='["eng","ita"]'
+    --ignore-empty-language     (defaul=False)      :   Ignore books with unidentfied language   
+    --formats=<string>                              :   Restrict the download to a list of specific formats 
+                                                        (Ex: --formats='["epub", "mobi", "pdf"]'
+    --ignore-formats=<string>                       :   Ignore the formats of a list of specific. 
+                                                        Compliant with --formats.
+                                                        (Ex: --ignored-formats='["mp3", "rar", "zip"]'
+    --single-format             (defaul=False)      :   Limit the download to 1 format per book with this preference order
+                                                        'azw', 'azw3', 'cbr', 'chm', 'djvu', 'doc', 'docx', 'epub', 'kepub',
+                                                        'lit', 'lrf', 'mobi', 'original_epub', 'pdf', 'ppt', 'prc', 'rar'
+                                                        , 'rtf', 'txt', 'zip', 'fb2'
+    --identifiers=<string>                          :   Restrict the download to a list of specific identifiers 
+                                                        (Ex: --identifiers='["isbn","asin"]'
+    --ignore-empty-identifiers  (defaul=False)      :   Ignore books without identifiers (often OCR)   
+    '''
+
+
+
     # all_ordered_formats=['azw', 'azw3', 'cbr', 'chm', 'djvu', 'doc', 'docx', 'epub', 'lit', 'lrf', 'mobi', 'original_epub', 'pdf', 'ppt', 'prc', 'rar', 'rtf', 'txt', 'zip']        
 
+    print()
+
     if single_format: my_formats = formats if formats else all_ordered_formats 
     else: my_formats=formats
-    print("formats=", my_formats)
+    # print("formats=", my_formats)
 
     min_size=int(min_size)*1024*1024
     max_size=int(max_size)*1024*1024
@@ -521,7 +604,7 @@ def download_ebooks(dir= 'my_books', formats=[], single_format=False, ignored_fo
                                 # print("Format '{}': size expected={}".format(f, hsize(source['formats'][f]['size'])))
                                     pass
 
-                                print(f"--> format '{f}' for ({book['title']} / {book['authors'][0]} / {str(book['series'])})")
+                                # print(f"--> format '{f}' for ({book['title']} / {book['authors'][0]} / {str(book['series'])})")
                                 if not dry_run:    
                                     try:
                                         get_file(dir, book, f, s, map, map_lib)

diff --git a/calisuck.py b/calisuck.py
@@ -9,16 +9,19 @@
 from humanize import naturalsize as hsize
 from langid.langid import LanguageIdentifier, model
 import iso639
-import pickle
 import time
 from requests.adapters import HTTPAdapter
 import urllib.parse
 import urllib3
+from beautifultable import BeautifulTable
+
+
 urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
 
 all_ordered_formats=['azw', 'azw3', 'cbr', 'chm', 'djvu', 'doc', 'docx', 'epub', 'kepub', 'lit', 'lrf', 'mobi', 'original_epub', 'pdf', 'ppt', 'prc', 'rar', 'rtf', 'txt', 'zip', 'fb2']        
 identifier = LanguageIdentifier.from_modelstring(model, norm_probs=True)
 
+
 def load_metadata(path, uuid):
     filepath=path+'/'+uuid+'/metadata.json'
     # print (filepath)
@@ -30,18 +33,19 @@ def load_metadata(path, uuid):
             print ("Error loading metadata for:", uuid, "from path:", path)
             return 0
     else:
-        print ("Metadata not found for:", uuid, "from path:", path)
+        # print ("Metadata not found for:", uuid, "from path:", path)
         return 0        
 
+
 def save_metadata(path, book):
     filepath=path+'/'+book['uuid']+'/metadata.json'
-    print("Saving book metadata for:", book['uuid'], "to:", filepath)
+    # print("Saving book metadata for:", book['uuid'], "to:", filepath)
     os.makedirs(os.path.dirname(filepath+".tmp"), exist_ok=True)
     with open(filepath+".tmp", 'w') as fd:
         json.dump(book, fd, indent=4, separators=(',', ': '))
     try:
         shutil.move(filepath+".tmp", filepath)
-        print("Saved to:", filepath)
+        # print("Saved to:", filepath)
     except:
         print("Unable to rename .tmp file:", filepath+".tmp")
 
@@ -51,6 +55,7 @@ def get_cover_path(path, uuid):
     if os.path.isfile(filepath): return filepath
     else: return 0
 
+
 def get_file_path(path, uuid, fileformat):
     files=os.listdir(path+'/'+uuid)
     if files:
@@ -83,7 +88,9 @@ def get_cover(path, book, map):
         print("Saved to:", filepath)
 
 
-def download_covers(dir= '.', server='', map=""):
+def download_covers(dir='my_books', server='', map=""):
+    """ Download covers for each books"""
+
     for root, dirs, files in os.walk(dir, topdown=True):
         for d in dirs:
             # print() 
@@ -108,6 +115,7 @@ def download_covers(dir= '.', server='', map=""):
             else:
                 print ("No ebook metadata found in:", root)
 
+
 def get_file_size(url):
     print("Downloading size:", url)
     r = requests.head(url, verify=False)
@@ -116,6 +124,7 @@ def get_file_size(url):
     print("Size received="+ hsize(size))
     return int(size)
 
+
 def get_file(path, book, format, session, map, map_lib):
     uuid = book['uuid']
     url=book['source']['formats'][format]['url']
@@ -173,7 +182,6 @@ def set_status(uuid, status, dir='.'):
             print("Status unchanged changed ", status+":", book['uuid'])
     else:
         print ("No ebook metadata found for:", uuid)
-
 
 
 def remove_book(uuid, path='.'):
@@ -189,36 +197,6 @@ def remove_book(uuid, path='.'):
         print(uuid, "not found")
 
 
-
-def explore(site, help=False):
-    server=site
-    api=server+'ajax/'
-    print("Server:", server)
-    url=api+'library-info'
-    print()
-    print("Getting libraries:", server)
-    print(url)
-    try:
-        r = requests.get(url)
-        r.raise_for_status()
-    except:
-        print("Unable to open site:", url)
-        sys.exit(1)
-
-    libraries = r.json()["library_map"].keys()
-    print("Libraries:")    
-    for l in libraries:
-        library='/'+l
-        url=api+'search'+library+'?num=0'
-        try:
-            r = requests.get(url)
-            r.raise_for_status()
-        except:
-            print("Unable to open site:", url)
-            continue
-        print("\t{}: {} ebooks".format(l, r.json()["total_num"]))
-
-
 def update_done_status(book):
     source=book['source']
     if source['status']!='ignored':
@@ -228,19 +206,44 @@ def update_done_status(book):
             book['source']['status']="todo"
 
 
-def index_ebooks(site, library="", start=0, stop=0, dir=".", force_refresh=False):
+def index_ebooks(site, library="", start=0, stop=0, dir="my_books", inc=1000, force_refresh=False):
+    """
+    Index a remote Calibre library
+
+    You will get in your <dir> all the metadata (title, authors, isbn, ...) for each book. 
+    They're stored as simple JSON files (metadata.json) so that you can easily visualize them or process them with 'jq' program.
+    They are stored in subdirectories with a UUID as a name. These directories do match different books and allow you to group all 
+    the different formats of the same book and eventually the cover file.
+    You can mix books from different sites without any (theoric) collisions  
+
+    Params:
+
+    --site=<string>                         :   Url of the site to index (ex: http://123.123.123.123/)
+
+    --library=<string>  (default=my_books)  :   Id of library to index. The script index the default library by default.
+                                                The id is string following '&library_id=' in the url
+
+    --force_refresh     (defaul=False)      :   Force a refresh of the metadata. By default all the metdata 
+                                                already gathered are ignored
+
+    --start=<int>       (default=0)
+    --stop=<int>        (default=0)         :   Allow indexing between a range of ebooks
+    
+    --inc=<int>         (default=1000)      :   Fix the number of ebooks for each request one the server   
+    """
+
+    os.makedirs(dir, exist_ok=True)
+
     offset= 0 if not start else start-1
-    num=1000
+    num=min(1000,inc)
     server=site
     api=server+'ajax/'
-    #api=server+'calibre/ajax/'
     library= '/'+library if library else library 
 
     print("Server:", server)
     url=api+'search'+library+'?num=0'
     print()
     print("Getting ebooks count:", server)
-    print(url)
     try:
         r = requests.get(url,verify=False)
         r.raise_for_status()
@@ -251,63 +254,63 @@ def index_ebooks(site, library="", start=0, stop=0, dir=".", force_refresh=False
     total_num=int(r.json()["total_num"])
     total_num= total_num if not stop else stop
 
+    print()
+    print("Start indexing")
+
     range=offset+1
     while offset < total_num:
         remaining_num = min(num, total_num - offset)
-        print()
-        print("Downloading ids: offset="+str(offset), "num="+str(remaining_num))
-        # url=api+'search?num='+str(remaining_num)+'&offset='+str(offset)
+        # print()
+        # print("Downloading ids: offset="+str(offset), "num="+str(remaining_num))
         url=api+'search'+library+'?num='+str(remaining_num)+'&offset='+str(offset)+'&sort=timestamp&sort_order=desc'
 
-        print("->", url)
+        # print("->", url)
         r=requests.get(url, verify=False)
-        print("Ids received from:"+str(offset), "to:"+str(offset+remaining_num-1))
+        # print("Ids received from:"+str(offset), "to:"+str(offset+remaining_num-1))
 
-        print()
-        print("Downloading metadata from", str(offset+1), "to", str(offset+remaining_num))
+        # print()
+        # print("\rDownloading metadata from", str(offset+1), "to", str(offset+remaining_num),end='')
         books_s=",".join(str(i) for i in r.json()['book_ids'])
         url=api+'books'+library+'?ids='+books_s
-        print("->", url)
+        # print("->", url)
         r=requests.get(url, verify=False)
-        print(len(r.json()), "received")
+        # print(len(r.json()), "received")
 
-        for id in r.json().keys():                
-                print()
-                print ('--> range={}/{}'.format(str(range),str(total_num)))
-                r_book=r.json()[id]
+        for id, r_book in r.json().items():                
                 uuid=r_book['uuid']
                 if not uuid:
                     print ("No uuid for ebook: ignored")
                     continue 
-                # print ('\r--> range={}/{}'.format(str(range),str(total_num)), "uuid="+uuid, "("+r.json()[id]['title']+")", end='')
-                # print (r.json()[id])
-                # title= r.json()[id]['title'] if 'title' in r.json()[id] else "<untitled>"
+
                 if r_book['authors']:
-                    print("uuid="+uuid, "("+r_book['title']+" -- "+r_book['authors'][0]+")")
+                    desc= f"uuid={uuid} ({r_book['title']} / {r_book['authors'][0]})"
                 else:
-                    print("uuid="+uuid, "("+r_book['title']+")")
+                    desc= f"uuid={uuid} ({r_book['title']})"
+                s=f"\r--> {range}/{total_num} - {desc}"
+                s='{:140.140}'.format(s)
+                print (s, end='')
 
                 if not force_refresh:
-                    # print("Checking local metadata:", uuid)
                     try:
                         book = load_metadata(dir, uuid)
                     except:
+                        print()
                         print("Unable to get metadata from:", uuid)
                         range+=1
                         continue
                     if book:
-                        print("Metadata already present for:", uuid)
+                        # print("Metadata already present for:", uuid)
                         range+=1
                         continue
 
-                if not r.json()[id]['formats']:
+
+                if not r_book['formats']:
+                    print()
                     print("No format found for {}".format(r_book['uuid']))
                     range+=1
                     continue
 
 
-                print("Analyzing for:", uuid)
-
                 book={}
                 url=api+'book/'+id
                 book['title']=r_book['title']
@@ -323,17 +326,13 @@ def index_ebooks(site, library="", start=0, stop=0, dir=".", force_refresh=False
                 languages=r_book['languages']
                 if not languages:
                 # if True:
-                    # pass
-                    print ("Analyzing languages")
                     if book['comments']:
                         text=book['comments']
                     else:
                         text=book['title']
                     s_language, prob=identifier.classify(text)
-                    print (s_language, prob)
                     if prob >= 0.85:
                         language =  iso639.to_iso639_2(s_language)
-                        print("language=", language)
                         book['languages']=[language]
                     else:
                         book['languages']=[]
@@ -362,7 +361,7 @@ def index_ebooks(site, library="", start=0, stop=0, dir=".", force_refresh=False
                 source['timestamp']=r_book['timestamp']
 
                 format_sources={}
-                formats=r.json()[id]['formats']
+                formats=r_book['formats']
                 for f in formats:
                     s={}    
                     url=''
@@ -375,6 +374,7 @@ def index_ebooks(site, library="", start=0, stop=0, dir=".", force_refresh=False
                     if 'size' in r_book['format_metadata'][f]:
                         s['size']=int(r_book['format_metadata'][f]['size'])
                     else:
+                        print()
                         print("Size not found for format '{}' : {}".format(f, uuid))
                         print("Trying to get size online: {}".format(s['url']))
                         try:
@@ -387,76 +387,79 @@ def index_ebooks(site, library="", start=0, stop=0, dir=".", force_refresh=False
 
                 source['formats']=format_sources
                 book['source']=source
-                print("Analyzed:", uuid)
 
 
                 if not source['formats']:
-                    print("No format found for {}".format(r.json()[id]['uuid']))
+                    print("No format found for {}".format(r_book['uuid']))
                     range+=1
                     continue
                 update_done_status(book)
-                print("Saving metadata for:", uuid)
+                # print("Saving metadata for:", uuid)
                 try:
                     save_metadata(dir, book)
                 except:
+                    print()
                     print("Unable to save book metadata", book['uuid'])
                 range+=1
         offset=offset+num
-
+    print()
+    print("Done")
 
 
 def has_languages(book, languages=[], ignore_empty_language=False):
 
-    print("Accepted languages", languages)
+    # print("Accepted languages", languages)
     if not ignore_empty_language:
-            print("Unknown language accepted")
+            # print("Unknown language accepted")
+            pass
 
     # rustine
     if not 'languages' in book:        
         book['languages']=[]
 
-    print("Book languages", book['languages'])
+    # print("Book languages", book['languages'])
 
     if ignore_empty_language and not book['languages']:
-        print ("'{}' ignored: language is empty".format(book['uuid']))
+        # print ("'{}' ignored: language is empty".format(book['uuid']))
         return False
 
     if not ignore_empty_language and not book['languages']:
-        print ("'{}' todo: language is empty".format(book['uuid']))
+        # print ("'{}' todo: language is empty".format(book['uuid']))
         return True
 
     expected_languages=list(set(book['languages']) & set(languages))
     if languages and not expected_languages:
-        print ("'{}' ignored: language {} not in {}".format(book['uuid'], book['languages'],languages))
+        # print ("'{}' ignored: language {} not in {}".format(book['uuid'], book['languages'],languages))
         return False
 
-    print ("'{}' todo: expected languages {}".format(book['uuid'], expected_languages))
+    # print ("'{}' todo: expected languages {}".format(book['uuid'], expected_languages))
     return True
 
 def has_identifiers(book, identifiers=[], ignore_empty_identifiers=False):
 
-    print("Accepted identifiers", identifiers)
+    # print("Accepted identifiers", identifiers)
     if not ignore_empty_identifiers:
-            print("Unknown identifiers accepted")
-    print("Book identifiers", book['identifiers'].keys())
+            # print("Unknown identifiers accepted")
+            pass
+    # print("Book identifiers", book['identifiers'].keys())
 
     if ignore_empty_identifiers and not book['identifiers']:
-        print ("'{}' ignored: identifier is empty".format(book['uuid']))
+        # print ("'{}' ignored: identifier is empty".format(book['uuid']))
         return False
 
     if not ignore_empty_identifiers and not book['identifiers']:
-        print ("'{}' todo: identifiers is empty".format(book['uuid']))
+        # print ("'{}' todo: identifiers is empty".format(book['uuid']))
         return True
 
     expected_identifiers=list(set(book['identifiers'].keys()) & set(identifiers))
     if identifiers and not expected_identifiers:
-        print ("'{}' ignored: identifiers {} not in {}".format(book['uuid'], book['identifiers'].keys(), identifiers))
+        # print ("'{}' ignored: identifiers {} not in {}".format(book['uuid'], book['identifiers'].keys(), identifiers))
         return False
 
-    print ("'{}' todo: expected identifiers {}".format(book['uuid'], expected_identifiers))
+    # print ("'{}' todo: expected identifiers {}".format(book['uuid'], expected_identifiers))
     return True
 
-def download_ebooks(dir= '.', server='', formats=[], single_format=False, ignored_formats=[], languages=[], identifiers=[], min_size=0, max_size=0, ignore_empty_language=False, ignore_empty_identifiers=False, dry_run=False, map="", map_lib=""):
+def download_ebooks(dir= 'my_books', formats=[], single_format=False, ignored_formats=[], languages=[], identifiers=[], min_size=0, max_size=0, ignore_empty_language=False, ignore_empty_identifiers=False, dry_run=False, map="", map_lib=""):
     # all_ordered_formats=['azw', 'azw3', 'cbr', 'chm', 'djvu', 'doc', 'docx', 'epub', 'lit', 'lrf', 'mobi', 'original_epub', 'pdf', 'ppt', 'prc', 'rar', 'rtf', 'txt', 'zip']        
 
     if single_format: my_formats = formats if formats else all_ordered_formats 
@@ -467,8 +470,6 @@ def download_ebooks(dir= '.', server='', formats=[], single_format=False, ignore
     max_size=int(max_size)*1024*1024
     print ("Format expected between {} and {}".format(hsize(min_size), hsize(max_size) if max_size else "infinity"))
 
-    # sys.exit()
-
     total_size=0
     total_size_by_format={}
     total_ebook_count=0
@@ -488,9 +489,6 @@ def download_ebooks(dir= '.', server='', formats=[], single_format=False, ignore
             if book:
                 status=book['source']['status']
                 if status=="todo":
-                    print()
-                    print()
-                    print("-->", uuid, "("+book['title']+" -- "+book['authors'][0]+" -- serie: "+ str(book['series'])+")")
 
                     if not has_languages(book, languages=languages, ignore_empty_language=ignore_empty_language):
                         continue
@@ -501,8 +499,9 @@ def download_ebooks(dir= '.', server='', formats=[], single_format=False, ignore
                     source=book['source']
                     download_formats = get_formats_to_download(book, accepted_formats=my_formats, single_format=single_format, ignored_formats=ignored_formats, max_size=max_size, min_size=min_size)
                     if not len(download_formats):
-                        print ("'{}' ignored: no more format available in formats expected {}".format(uuid, download_formats))
-                        print()
+                        # print ("'{}' ignored: no more format available in formats expected {}".format(uuid, download_formats))
+                        # print()
+                        pass
                     else:
                         ebook_kept=False
                         for f in download_formats:
@@ -515,19 +514,20 @@ def download_ebooks(dir= '.', server='', formats=[], single_format=False, ignore
                             if url:
                                 # # It shouldn't occur: Need to download again
                                 if get_file_path(dir, uuid, f):
-                                    print ("Format '{}' already present for {}: Retrying".format(f, uuid))
-                                    print()
+                                    # print ("Format '{}' already present for {}: Retrying".format(f, uuid))
+                                    # print()
                                 #     continue
 
-                                print("Format '{}': size expected={}".format(f, hsize(source['formats'][f]['size'])))
-
+                                # print("Format '{}': size expected={}".format(f, hsize(source['formats'][f]['size'])))
+                                    pass
+
+                                print(f"--> format '{f}' for ({book['title']} / {book['authors'][0]} / {str(book['series'])})")
                                 if not dry_run:    
                                     try:
                                         get_file(dir, book, f, s, map, map_lib)
                                         book['formats'].append(f)
                                         book['source']['formats'][f]['status']="done"
                                         time.sleep(0)
-                                    # except:
                                     except Exception as msg:
                                         print("Unable to get book:", url)
                                         print(msg)
@@ -553,8 +553,9 @@ def download_ebooks(dir= '.', server='', formats=[], single_format=False, ignore
                                     total_count_by_format[f]+=1
                                 total_format_count +=1
                             else:    
-                                print ("Format '{}' ignored for {} ({}): No url)".format(f, uuid, book['title']))
-                                print()
+                                # print ("Format '{}' ignored for {} ({}): No url)".format(f, uuid, book['title']))
+                                # print()
+                                pass
                         if ebook_kept:
                             total_ebook_count+=1
                             if not book['languages']:
@@ -596,36 +597,68 @@ def download_ebooks(dir= '.', server='', formats=[], single_format=False, ignore
                     print(f'--> {counter} books handled', end="\r")
 
     print()
-    print("Total count of updated ebooks:", total_ebook_count)
+    print("Reporting ...")
+
+    print()
+    table = BeautifulTable()
+    table.column_headers = ["", "Total count"]
+    table.append_row(["Formats", total_format_count])
+    table.append_row(["Ebooks", total_ebook_count])
+    print(table)
+
+    print()
+    table = BeautifulTable()
+    table.column_headers = ["", "Size"]
+    table.append_row(["Min", hsize(size_min)])
+    table.append_row(["Max", hsize(size_max)])
+    table.append_row(["Total",  hsize(total_size)])
+    print(table)
+
+    print()
     print("Total ebooks updated by language:")
+    table = BeautifulTable()
+    table.column_headers = ["Language", "Ebooks count"]
     for l, c in language_count.items():
-        print("   '{}': {}".format(l, c))
+        table.append_row([l, c])
+    print(table)
+
+    print()
     print("Total ebooks updated by identifiers:")
-    for l, c in identifiers_count.items():
-        print("   '{}': {}".format(l, c))
-    print("Total count of formats:", total_format_count)
+    table = BeautifulTable()
+    table.column_headers = ["Identifiers", "Ebooks count"]
+    for i, c in identifiers_count.items():
+        table.append_row([i, c])
+    print(table)
+
+    print()
     print("Total count of ebooks by format:")
+    table = BeautifulTable()
+    table.column_headers = ["Formats", "Ebooks count"]
     for f, c in total_count_by_format.items():
-        print("\t'{}': {}".format(f, c))
+        table.append_row([f, c])
+    print(table)
+
     print()
-    print("Total size:", hsize(total_size))
-    print("Maximum file size:", hsize(size_max))
-    print("Minimum file size:", hsize(size_min))
     print("Total size by format:")
+    table = BeautifulTable()
+    table.column_headers = ["Format:", "Size"]
     for f, s in total_size_by_format.items():
-        print("\t'{}': {}".format(f, hsize(s)))
+        table.append_row([f, hsize(s)])
+    print(table)
 
+    print()
+    print("Done !!!")
 
 
 def get_formats_to_download(book, accepted_formats=[], ignored_formats=[], single_format=False, min_size=0, max_size=0):
-    print("Accepted formats", accepted_formats)
+    # print("Accepted formats", accepted_formats)
     source=book['source']
-    print("Formats available in source: {}".format(list(source['formats'].keys())))
+    # print("Formats available in source: {}".format(list(source['formats'].keys())))
     my_formats=[]
     for f,v in source['formats'].items():
         if v['status']=='todo':
             my_formats.append(f)
-    print("Formats in 'todo': {}".format(my_formats))
+    # print("Formats in 'todo': {}".format(my_formats))
 
     formats=[]
     if single_format:
@@ -644,53 +677,37 @@ def get_formats_to_download(book, accepted_formats=[], ignored_formats=[], singl
         else:
             formats=my_formats
 
-    print("Formats expected: {}".format(formats))
+    # print("Formats expected: {}".format(formats))
 
     download_formats=formats[:]
     for f in formats:
         if not 'size' in source['formats'][f] and max_size:
-            print ("Format '{}' ignored for {}: Size unknown".format(f, book['uuid']))
+            # print ("Format '{}' ignored for {}: Size unknown".format(f, book['uuid']))
             download_formats.remove(f)
         else:
             size = source['formats'][f]['size']
             if size < min_size or (max_size and size > max_size):
                 download_formats.remove(f)
-                print ("Format '{}' ignored for {}: size={} but expected between {} and {}".format(f, book['uuid'], hsize(size), hsize(min_size), hsize(max_size) if max_size else "infinity"))
+                # print ("Format '{}' ignored for {}: size={} but expected between {} and {}".format(f, book['uuid'], hsize(size), hsize(min_size), hsize(max_size) if max_size else "infinity"))
     return download_formats
 
 
 def update_format_statuses(book,refresh_ignored):
     formats=book['source']['formats']
     for f, v in formats.items():
         if v['status']=='ignored' and not refresh_ignored:
-            print ("Format '{}' ignored: {} ({}))".format(f, book['uuid'], book['title']))
+            # print ("Format '{}' ignored: {} ({}))".format(f, book['uuid'], book['title']))
+            pass
         else:
-            print ("Format '{}' todo: {} ({}))".format(f, book['uuid'], book['title']))
+            # print ("Format '{}' todo: {} ({}))".format(f, book['uuid'], book['title']))
             book['source']['formats'][f]['status']='todo'
 
 
-def reset_ignored(dir= '.', server=''):
-    for root, dirs, files in os.walk(dir, topdown=True):
-        for uuid in dirs:
-            save_ebook=False
-            book = load_metadata(root, uuid)
-            if book:
-                status=book['source']['status']
-                if status=="ignored":
-                    print ("'{}' status 'ignored' reset to 'todo'".format(book['uuid']))
-                    book['source']['status']='todo'
-                    save_ebook=True
-
-                formats=book['source']['formats']
-                for f, v in formats.items():
-                    if v['status']=='ignored':
-                        print ("'{}' format 'ignored' reset to 'todo'".format(book['uuid']))
-                        book['source']['formats'][f]['status']='todo'
-                        save_ebook=True
-
-                if save_ebook:
-                    save_metadata(dir, book)
-
 
 if __name__ == "__main__":
-    fire.Fire()
+    fire.Fire({
+        "index_ebooks": index_ebooks,
+        "download_ebooks": download_ebooks,
+        "download_covers": download_covers,
+        "set_status": set_status
+        })
diff --git a/calisuck.py b/calisuck.py
@@ -16,11 +16,6 @@
 import urllib3
 urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
 
-
-#  l=  book.get_items_of_type(ebooklib.ITEM_DOCUMENT)
-# >>> for i in l:
-# ...     print(BeautifulSoup(i.get_content()).text)
-
 all_ordered_formats=['azw', 'azw3', 'cbr', 'chm', 'djvu', 'doc', 'docx', 'epub', 'kepub', 'lit', 'lrf', 'mobi', 'original_epub', 'pdf', 'ppt', 'prc', 'rar', 'rtf', 'txt', 'zip', 'fb2']        
 identifier = LanguageIdentifier.from_modelstring(model, norm_probs=True)
 

diff --git a/calisuck.py b/calisuck.py
@@ -11,8 +11,15 @@
 import iso639
 import pickle
 import time
+from requests.adapters import HTTPAdapter
+import urllib.parse
+import urllib3
+urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
 
 
+#  l=  book.get_items_of_type(ebooklib.ITEM_DOCUMENT)
+# >>> for i in l:
+# ...     print(BeautifulSoup(i.get_content()).text)
 
 all_ordered_formats=['azw', 'azw3', 'cbr', 'chm', 'djvu', 'doc', 'docx', 'epub', 'kepub', 'lit', 'lrf', 'mobi', 'original_epub', 'pdf', 'ppt', 'prc', 'rar', 'rtf', 'txt', 'zip', 'fb2']        
 identifier = LanguageIdentifier.from_modelstring(model, norm_probs=True)
@@ -59,11 +66,18 @@ def get_file_path(path, uuid, fileformat):
         else: return 0
     else: return 0
 
-def get_cover(path, book):
+
+def get_cover(path, book, map):
     url=book['source']['cover']
+    if map:
+        pu=urllib.parse.urlparse(url)
+        pu=(pu[0], map, *pu[2:])
+        print(pu)
+        url=urllib.parse.urlunparse(pu)
+
     print("Downloading cover from:", url)
 
-    r=requests.get(url, timeout=10)
+    r=requests.get(url, timeout=(20, 3), verify=False)
     r.raise_for_status()
 
     filepath=path+'/'+book['uuid']+'/cover.jpg'
@@ -74,20 +88,21 @@ def get_cover(path, book):
         print("Saved to:", filepath)
 
 
-def download_covers(dir= '.', server=''):
+def download_covers(dir= '.', server='', map=""):
     for root, dirs, files in os.walk(dir, topdown=True):
         for d in dirs:
             # print() 
             # print("-->", d) 
             book = load_metadata(root, d)
             if book:
-                if book['source']['status'] != "ignored":
+                # if book['source']['status'] != "ignored":
+                if True:
                     if not get_cover_path(root, book['uuid']): 
                         print() 
                         print("-->", d) 
                         print(book['uuid'])
                         try:
-                            get_cover(root, book)
+                            get_cover(root, book, map)
                         except:
                             print ("Unable to get cover", book['uuid'])   
                     else:
@@ -100,19 +115,34 @@ def download_covers(dir= '.', server=''):
 
 def get_file_size(url):
     print("Downloading size:", url)
-    r = requests.head(url)
+    r = requests.head(url, verify=False)
     r.raise_for_status()
     size=r.headers['Content-Length']
     print("Size received="+ hsize(size))
     return int(size)
 
-def get_file(path, book, format, session):
+def get_file(path, book, format, session, map, map_lib):
     uuid = book['uuid']
     url=book['source']['formats'][format]['url']
+    if map:
+        pu=urllib.parse.urlparse(url)
+        pu=(pu[0], map, *pu[2:])
+        print(pu)
+        url=urllib.parse.urlunparse(pu)
+
+    if map_lib:
+        # pu=urllib.parse.urlparse(url)
+        # print(pu)
+        url_s=url.split("/")
+        # print(url_s)
+        url_s=url_s[:-1]+[map_lib] 
+        # print('/'.join(url_s))
+        url='/'.join(url_s)
+
 
     print("Downloading ebook:", url)
     print("Size expected (estimation):", hsize(book['source']['formats'][format]['size']))
-    r = session.get(url, timeout=5)
+    r = session.get(url, timeout=(25,15), verify=False)
     # headers = {"Range": "bytes=0-1023"}
     # r = requests.get(url, headers=headers)
     r.raise_for_status()
@@ -122,6 +152,7 @@ def get_file(path, book, format, session):
     else:
         print("Fize received")
 
+
     filename=re.findall(r'filename="(.*)"', r.headers['Content-Disposition'])
     # print(filename)
     if len(filename):
@@ -142,7 +173,7 @@ def set_status(uuid, status, dir='.'):
         if book['source']['status'] != status: 
             book['source']['status'] = status
             save_metadata(dir, book)
-            print("Status changed to", status+":", book['uuid'])
+            print("Status changed to", status+":", book['uuid'], "(", book['title'], ")")
         else:
             print("Status unchanged changed ", status+":", book['uuid'])
     else:
@@ -204,7 +235,7 @@ def update_done_status(book):
 
 def index_ebooks(site, library="", start=0, stop=0, dir=".", force_refresh=False):
     offset= 0 if not start else start-1
-    num=500
+    num=1000
     server=site
     api=server+'ajax/'
     #api=server+'calibre/ajax/'
@@ -234,7 +265,7 @@ def index_ebooks(site, library="", start=0, stop=0, dir=".", force_refresh=False
         url=api+'search'+library+'?num='+str(remaining_num)+'&offset='+str(offset)+'&sort=timestamp&sort_order=desc'
 
         print("->", url)
-        r=requests.get(url)
+        r=requests.get(url, verify=False)
         print("Ids received from:"+str(offset), "to:"+str(offset+remaining_num-1))
 
         print()
@@ -248,14 +279,18 @@ def index_ebooks(site, library="", start=0, stop=0, dir=".", force_refresh=False
         for id in r.json().keys():                
                 print()
                 print ('--> range={}/{}'.format(str(range),str(total_num)))
-                uuid=r.json()[id]['uuid']
+                r_book=r.json()[id]
+                uuid=r_book['uuid']
                 if not uuid:
                     print ("No uuid for ebook: ignored")
                     continue 
                 # print ('\r--> range={}/{}'.format(str(range),str(total_num)), "uuid="+uuid, "("+r.json()[id]['title']+")", end='')
                 # print (r.json()[id])
                 # title= r.json()[id]['title'] if 'title' in r.json()[id] else "<untitled>"
-                print("uuid="+uuid, "("+r.json()[id]['title']+")")
+                if r_book['authors']:
+                    print("uuid="+uuid, "("+r_book['title']+" -- "+r_book['authors'][0]+")")
+                else:
+                    print("uuid="+uuid, "("+r_book['title']+")")
 
                 if not force_refresh:
                     # print("Checking local metadata:", uuid)
@@ -271,26 +306,28 @@ def index_ebooks(site, library="", start=0, stop=0, dir=".", force_refresh=False
                         continue
 
                 if not r.json()[id]['formats']:
-                    print("No format found for {}".format(r.json()[id]['uuid']))
+                    print("No format found for {}".format(r_book['uuid']))
                     range+=1
                     continue
 
 
+                print("Analyzing for:", uuid)
+
                 book={}
                 url=api+'book/'+id
-                book['title']=r.json()[id]['title']
-                book['authors']=r.json()[id]['authors']
-                book['series']=r.json()[id]['series']
-                book['series']=r.json()[id]['series']
-                book['series_index']=r.json()[id]['series_index']
+                book['title']=r_book['title']
+                book['authors']=r_book['authors']
+                book['series']=r_book['series']
+                book['series_index']=r_book['series_index']
                 book['edition']=0
-                book['uuid']=r.json()[id]['uuid']
-                book['identifiers']=r.json()[id]['identifiers']
-                book['comments']=r.json()[id]['comments']
-                book['pubdate']=r.json()[id]['pubdate']
-                book['publisher']=r.json()[id]['publisher']
-                languages=r.json()[id]['languages']
+                book['uuid']=r_book['uuid']
+                book['identifiers']=r_book['identifiers']
+                book['comments']=r_book['comments']
+                book['pubdate']=r_book['pubdate']
+                book['publisher']=r_book['publisher']
+                languages=r_book['languages']
                 if not languages:
+                # if True:
                     # pass
                     print ("Analyzing languages")
                     if book['comments']:
@@ -310,11 +347,11 @@ def index_ebooks(site, library="", start=0, stop=0, dir=".", force_refresh=False
                     for l in languages:
                         book['languages'].append(iso639.to_iso639_2(l))
 
-                book['tags']=r.json()[id]['tags']
+                book['tags']=r_book['tags']
                 book['formats']=[]
                 book['metadata_version']=0.1
                 source={}
-                source['url']=url
+                source['url']=url+library
                 source['id']=id
                 try:
                     tmpbook = load_metadata(dir, uuid)
@@ -326,22 +363,22 @@ def index_ebooks(site, library="", start=0, stop=0, dir=".", force_refresh=False
                     source['status']="ignored"
                 else:
                     source['status']="todo"
-                source['cover']=server+r.json()[id]['cover']  
-                source['timestamp']=r.json()[id]['timestamp']
+                source['cover']=server+r_book['cover']  
+                source['timestamp']=r_book['timestamp']
 
                 format_sources={}
                 formats=r.json()[id]['formats']
                 for f in formats:
                     s={}    
                     url=''
-                    if f in r.json()[id]['main_format']:
-                        url=r.json()[id]['main_format'][f]
+                    if f in r_book['main_format']:
+                        url=r_book['main_format'][f]
                     else:
-                        url=r.json()[id]['other_formats'][f]
+                        url=r_book['other_formats'][f]
                     s['url']=server+url
 
-                    if 'size' in r.json()[id]['format_metadata'][f]:
-                        s['size']=int(r.json()[id]['format_metadata'][f]['size'])
+                    if 'size' in r_book['format_metadata'][f]:
+                        s['size']=int(r_book['format_metadata'][f]['size'])
                     else:
                         print("Size not found for format '{}' : {}".format(f, uuid))
                         print("Trying to get size online: {}".format(s['url']))
@@ -355,6 +392,8 @@ def index_ebooks(site, library="", start=0, stop=0, dir=".", force_refresh=False
 
                 source['formats']=format_sources
                 book['source']=source
+                print("Analyzed:", uuid)
+
 
                 if not source['formats']:
                     print("No format found for {}".format(r.json()[id]['uuid']))
@@ -422,7 +461,7 @@ def has_identifiers(book, identifiers=[], ignore_empty_identifiers=False):
     print ("'{}' todo: expected identifiers {}".format(book['uuid'], expected_identifiers))
     return True
 
-def download_ebooks(dir= '.', server='', formats=[], single_format=False, ignored_formats=[], languages=[], identifiers=[], min_size=0, max_size=0, ignore_empty_language=False, ignore_empty_identifiers=False, dry_run=False):
+def download_ebooks(dir= '.', server='', formats=[], single_format=False, ignored_formats=[], languages=[], identifiers=[], min_size=0, max_size=0, ignore_empty_language=False, ignore_empty_identifiers=False, dry_run=False, map="", map_lib=""):
     # all_ordered_formats=['azw', 'azw3', 'cbr', 'chm', 'djvu', 'doc', 'docx', 'epub', 'lit', 'lrf', 'mobi', 'original_epub', 'pdf', 'ppt', 'prc', 'rar', 'rtf', 'txt', 'zip']        
 
     if single_format: my_formats = formats if formats else all_ordered_formats 
@@ -446,15 +485,17 @@ def download_ebooks(dir= '.', server='', formats=[], single_format=False, ignore
     identifiers_count={}
 
     s = requests.Session()
+
 
     for root, dirs, files in os.walk(dir, topdown=True):
-        for uuid in dirs:
+        for counter, uuid in enumerate(dirs):
             book = load_metadata(root, uuid)
             if book:
                 status=book['source']['status']
                 if status=="todo":
                     print()
-                    print("-->", uuid, "("+book['title']+")")
+                    print()
+                    print("-->", uuid, "("+book['title']+" -- "+book['authors'][0]+" -- serie: "+ str(book['series'])+")")
 
                     if not has_languages(book, languages=languages, ignore_empty_language=ignore_empty_language):
                         continue
@@ -466,32 +507,42 @@ def download_ebooks(dir= '.', server='', formats=[], single_format=False, ignore
                     download_formats = get_formats_to_download(book, accepted_formats=my_formats, single_format=single_format, ignored_formats=ignored_formats, max_size=max_size, min_size=min_size)
                     if not len(download_formats):
                         print ("'{}' ignored: no more format available in formats expected {}".format(uuid, download_formats))
+                        print()
                     else:
                         ebook_kept=False
                         for f in download_formats:
                             url = source['formats'][f]['url']
+                            # if map:
+                            #     pu=urllib.parse.urlparse(url)
+                            #     pu=(pu[0], map, *pu[2:])
+                            #     print(pu)
+                            #     print(urllib.parse.urlunparse(pu))
                             if url:
+                                # # It shouldn't occur: Need to download again
                                 if get_file_path(dir, uuid, f):
-                                    print ("Format '{}' already present for {}: Skipped".format(f, uuid))
-                                    continue
+                                    print ("Format '{}' already present for {}: Retrying".format(f, uuid))
+                                    print()
+                                #     continue
 
                                 print("Format '{}': size expected={}".format(f, hsize(source['formats'][f]['size'])))
 
                                 if not dry_run:    
                                     try:
-                                        get_file(dir, book, f, s)
+                                        get_file(dir, book, f, s, map, map_lib)
                                         book['formats'].append(f)
-                                        time.sleep(0.5)
+                                        book['source']['formats'][f]['status']="done"
+                                        time.sleep(0)
                                     # except:
                                     except Exception as msg:
                                         print("Unable to get book:", url)
                                         print(msg)
+                                        time.sleep(5)
                                         continue
                                     save_metadata(dir, book)
 
                                 ebook_kept=True
                                 size=source['formats'][f]['size']
-                                total_size += size 
+                                total_size += size
                                 size_max = size if size>size_max else size_max
                                 if not size_min: 
                                     size_min = size
@@ -508,6 +559,7 @@ def download_ebooks(dir= '.', server='', formats=[], single_format=False, ignore
                                 total_format_count +=1
                             else:    
                                 print ("Format '{}' ignored for {} ({}): No url)".format(f, uuid, book['title']))
+                                print()
                         if ebook_kept:
                             total_ebook_count+=1
                             if not book['languages']:
@@ -538,11 +590,15 @@ def download_ebooks(dir= '.', server='', formats=[], single_format=False, ignore
                         if book['source']['status']=="done":
                             save_metadata(dir, book)
                             print("Book done:", book['uuid'])
+                            print()
                     # total_ebook_count+=1
                 else:
-                    print()
-                    print("-->", uuid, "("+book['title']+")")
-                    print ('{} in status "{}": skipped'.format(book['uuid'], status))
+                    # print()
+                    # print("-->", uuid, "("+book['title']+")")
+                    # print ('{} in status "{}": skipped'.format(book['uuid'], status))
+                    # print(f"--> {uuid} ({book['title']}) in status {status}: skipped", end="\r")
+                    # print(f"--> {uuid} ({book['title']})", end="\r")
+                    print(f'--> {counter} books handled', end="\r")
 
     print()
     print("Total count of updated ebooks:", total_ebook_count)
@@ -617,66 +673,6 @@ def update_format_statuses(book,refresh_ignored):
             print ("Format '{}' todo: {} ({}))".format(f, book['uuid'], book['title']))
             book['source']['formats'][f]['status']='todo'
 
-def filter_ebooks(dir= '.', server='', formats=[], single_format=False, ignored_formats=[], languages=[], identifiers=[], min_size=0, max_size=0, ignore_empty_language=False, ignore_empty_identifiers=False):
-
-    if single_format: my_formats = formats if formats else all_ordered_formats 
-    else: my_formats=formats
-    print("formats=", my_formats)
-
-    min_size=int(min_size)*1024*1024
-    max_size=int(max_size)*1024*1024
-    print ("Format expected between {} and {}".format(hsize(min_size), hsize(max_size) if max_size else "infinity"))
-
-    # sys.exit()
-
-    total_ebook_count=0
-    total_format_count=0
-
-    for root, dirs, files in os.walk(dir, topdown=True):
-        for uuid in dirs:
-            book = load_metadata(root, uuid)
-            if book:
-                status=book['source']['status']
-                if status=="todo":
-                    print()
-                    print("-->", uuid, "("+book['title']+")")
-
-                    if not has_languages(book, languages=languages, ignore_empty_language=ignore_empty_language):
-                        book['source']['status']='ignored'
-                        print ("{} ignored: languages filtered".format(uuid))
-                        save_metadata(dir, book)
-                        total_ebook_count+=1
-                        continue
-
-                    if not has_identifiers(book, identifiers=identifiers, ignore_empty_identifiers=ignore_empty_identifiers):
-                        book['source']['status']='ignored'
-                        print ("{} ignored: identifiers filtered".format(uuid))
-                        save_metadata(dir, book)
-                        total_ebook_count+=1
-                        continue
-
-                    download_formats = get_formats_to_download(book, accepted_formats=my_formats, single_format=single_format, ignored_formats=ignored_formats, max_size=max_size, min_size=min_size)
-
-                    save_ebook=False
-                    source=book['source']
-                    formats_to_ignore=list(set(source['formats'].keys()) - set(book['formats']) - set(download_formats))
-                    print("formats to ignore:", formats_to_ignore)
-                    for f in formats_to_ignore:
-                        if source['formats'][f]['status']!='ignored':
-                            source['formats'][f]['status']='ignored' 
-                            print ("{} format ignored: '{}'".format(uuid, f))
-                            total_format_count +=1
-                            save_ebook=True
-                    if save_ebook:
-                        save_metadata(dir, book)
-                else:
-                    print()
-                    print("-->", uuid, "("+book['title']+")")
-                    print ('{} in status "{}": skipped'.format(book['uuid'], status))
-
-    print()
-    print("Total count of newly ignored ebooks:", total_ebook_count)
-    print("Total count of newly formats to ignore:", total_format_count)
 
 def reset_ignored(dir= '.', server=''):
     for root, dirs, files in os.walk(dir, topdown=True):
@@ -701,6 +697,5 @@ def reset_ignored(dir= '.', server=''):
                     save_metadata(dir, book)
 
 
-
 if __name__ == "__main__":
     fire.Fire()
diff --git a/calisuck.py b/calisuck.py
@@ -10,6 +10,7 @@
 from langid.langid import LanguageIdentifier, model
 import iso639
 import pickle
+import time
 
 
 
@@ -18,7 +19,7 @@
 
 def load_metadata(path, uuid):
     filepath=path+'/'+uuid+'/metadata.json'
-    print (filepath)
+    # print (filepath)
     if os.path.isfile(filepath):
         try:
             with open(filepath, 'r') as fd:
@@ -62,7 +63,7 @@ def get_cover(path, book):
     url=book['source']['cover']
     print("Downloading cover from:", url)
 
-    r=requests.get(url)
+    r=requests.get(url, timeout=10)
     r.raise_for_status()
 
     filepath=path+'/'+book['uuid']+'/cover.jpg'
@@ -76,125 +77,26 @@ def get_cover(path, book):
 def download_covers(dir= '.', server=''):
     for root, dirs, files in os.walk(dir, topdown=True):
         for d in dirs:
-            print() 
-            print("-->", d) 
+            # print() 
+            # print("-->", d) 
             book = load_metadata(root, d)
             if book:
                 if book['source']['status'] != "ignored":
                     if not get_cover_path(root, book['uuid']): 
+                        print() 
+                        print("-->", d) 
                         print(book['uuid'])
                         try:
                             get_cover(root, book)
                         except:
                             print ("Unable to get cover", book['uuid'])   
                     else:
-                        print ("Cover already present:", book['uuid'])
-                else:
-                    print ('book {} in status {}: ignored'.format(book['uuid'], book['source']['status']))
-            else:
-                print ("No ebook metadata found in:", root)
-
-def index_ebooks1(dir= '.', server=''):
-    filepath=dir+'/.index'
-
-    if os.path.isfile(filepath):
-        index=pickle.load(open(filepath, 'rb'))
-    else:
-        index = {}
-        index['uuids']=[]
-        index['identifiers']={}
-        index['authors']={}
-        index['titles']={}   
-
-    for root, dirs, files in os.walk(dir, topdown=True):
-
-        for d in dirs:
-            print() 
-            print("-->", d) 
-            book = load_metadata(root, d)
-            if book:
-                if book['source']['status'] == "todo":
-                    print(book['uuid'])
-                    if not book['uuid'] in index['uuids']:
-                        index['uuids'].append(book['uuid']) 
-                    if book['title'] not in index['titles']:
-                        index['titles'][book['title']] = [book['uuid']]
-                    elif not book['uuid'] in index['titles'][book['title']]:
-                        index['titles'][book['title']].append(book['uuid'])
-                    # index['titles'][book['title']] = index['titles'].get(book['title'], []) + [book['uuid']]
-                    for a in book["authors"]:
-                        if a not in index['authors']:
-                            index['authors'][a] = [book['uuid']]
-                        elif not book['uuid'] in index['authors'][a]:
-                            index['authors'][a].append(book['uuid'])
-                        # index['authors'][a] = index['authors'].get(a, []) + [book['uuid']] 
-                    for k, i in book["identifiers"].items():
-                        if k not in index['identifiers']:
-                            index['identifiers'][k]={i:[book['uuid']]}
-                        elif i not in index['identifiers'][k]:    
-                            index['identifiers'][k][i]=[book['uuid']]
-                        elif not book['uuid'] in index['identifiers'][k][i]:
-                            index['identifiers'][k][i].append(book['uuid'])
-                        # index['identifiers'][k][i]=index['identifiers'].get(k, {}).get(i, []) + [book['uuid']] 
+                        pass
+                        # print ("Cover already present:", book['uuid'])
                 else:
                     print ('book {} in status {}: ignored'.format(book['uuid'], book['source']['status']))
             else:
                 print ("No ebook metadata found in:", root)
-    print("titles indexed:", len(index['titles'])) 
-    print("authors indexed:", len(index['authors']))
-    print("identifiers indexed:", len(index['identifiers']))  
-    # print("identifiers:",index['identifiers'])  
-    pickle.dump(index, open(filepath, 'wb'))
-
-def index_calibre_local(dir= '.', calibre_dir=''):
-    filepath=dir+'/.index'
-
-    if os.path.isfile(filepath):
-        index=pickle.load(open(filepath, 'rb'))
-    else:
-        index = {}
-        index['identifiers']={}
-        index['authors']={}
-        index['titles']={}   
-
-    for root, dirs, files in os.walk(dir, topdown=True):
-
-        for d in dirs:
-            print() 
-            print("-->", d) 
-            book = load_metadata(root, d)
-            if book:
-                if book['source']['status'] == "todo":
-                    print(book['uuid'])
-                    if book['title'] not in index['titles']:
-                        index['titles'][book['title']] = [book['uuid']]
-                    elif not book['uuid'] in index['titles'][book['title']]:
-                        index['titles'][book['title']].append(book['uuid'])
-                    # index['titles'][book['title']] = index['titles'].get(book['title'], []) + [book['uuid']]
-                    for a in book["authors"]:
-                        if a not in index['authors']:
-                            index['authors'][a] = [book['uuid']]
-                        elif not book['uuid'] in index['authors'][a]:
-                            index['authors'][a].append(book['uuid'])
-                        # index['authors'][a] = index['authors'].get(a, []) + [book['uuid']] 
-                    for k, i in book["identifiers"].items():
-                        if k not in index['identifiers']:
-                            index['identifiers'][k]={i:[book['uuid']]}
-                        elif i not in index['identifiers'][k]:    
-                            index['identifiers'][k][i]=[book['uuid']]
-                        elif not book['uuid'] in index['identifiers'][k][i]:
-                            index['identifiers'][k][i].append(book['uuid'])
-                        # index['identifiers'][k][i]=index['identifiers'].get(k, {}).get(i, []) + [book['uuid']] 
-                else:
-                    print ('book {} in status {}: ignored'.format(book['uuid'], book['source']['status']))
-            else:
-                print ("No ebook metadata found in:", root)
-    print("titles indexed:", len(index['titles'])) 
-    print("authors indexed:", len(index['authors']))
-    print("identifiers indexed:", len(index['identifiers']))  
-    # print("identifiers:",index['identifiers'])  
-    pickle.dump(index, open(filepath, 'wb'))
-
 
 def get_file_size(url):
     print("Downloading size:", url)
@@ -204,13 +106,13 @@ def get_file_size(url):
     print("Size received="+ hsize(size))
     return int(size)
 
-def get_file(path, book, format):
+def get_file(path, book, format, session):
     uuid = book['uuid']
     url=book['source']['formats'][format]['url']
 
     print("Downloading ebook:", url)
     print("Size expected (estimation):", hsize(book['source']['formats'][format]['size']))
-    r = requests.get(url)
+    r = session.get(url, timeout=5)
     # headers = {"Range": "bytes=0-1023"}
     # r = requests.get(url, headers=headers)
     r.raise_for_status()
@@ -314,7 +216,7 @@ def index_ebooks(site, library="", start=0, stop=0, dir=".", force_refresh=False
     print("Getting ebooks count:", server)
     print(url)
     try:
-        r = requests.get(url)
+        r = requests.get(url,verify=False)
         r.raise_for_status()
     except:
         print("Unable to open site:", url)
@@ -340,7 +242,7 @@ def index_ebooks(site, library="", start=0, stop=0, dir=".", force_refresh=False
         books_s=",".join(str(i) for i in r.json()['book_ids'])
         url=api+'books'+library+'?ids='+books_s
         print("->", url)
-        r=requests.get(url)
+        r=requests.get(url, verify=False)
         print(len(r.json()), "received")
 
         for id in r.json().keys():                
@@ -389,20 +291,20 @@ def index_ebooks(site, library="", start=0, stop=0, dir=".", force_refresh=False
                 book['publisher']=r.json()[id]['publisher']
                 languages=r.json()[id]['languages']
                 if not languages:
-                    pass
-                    # print ("Analyzing languages")
-                    # if book['comments']:
-                    #     text=book['comments']
-                    # else:
-                    #     text=book['title']
-                    # s_language, prob=identifier.classify(text)
-                    # print (s_language, prob)
-                    # if prob >= 0.85:
-                    #     language =  iso639.to_iso639_2(s_language)
-                    #     print("language=", language)
-                    #     book['languages']=[language]
-                    # else:
-                    #     book['languages']=[]
+                    # pass
+                    print ("Analyzing languages")
+                    if book['comments']:
+                        text=book['comments']
+                    else:
+                        text=book['title']
+                    s_language, prob=identifier.classify(text)
+                    print (s_language, prob)
+                    if prob >= 0.85:
+                        language =  iso639.to_iso639_2(s_language)
+                        print("language=", language)
+                        book['languages']=[language]
+                    else:
+                        book['languages']=[]
                 else:
                     book['languages']=[]
                     for l in languages:
@@ -543,6 +445,8 @@ def download_ebooks(dir= '.', server='', formats=[], single_format=False, ignore
     language_count={}
     identifiers_count={}
 
+    s = requests.Session()
+
     for root, dirs, files in os.walk(dir, topdown=True):
         for uuid in dirs:
             book = load_metadata(root, uuid)
@@ -575,8 +479,9 @@ def download_ebooks(dir= '.', server='', formats=[], single_format=False, ignore
 
                                 if not dry_run:    
                                     try:
-                                        get_file(dir, book, f)
+                                        get_file(dir, book, f, s)
                                         book['formats'].append(f)
+                                        time.sleep(0.5)
                                     # except:
                                     except Exception as msg:
                                         print("Unable to get book:", url)
@@ -663,7 +568,6 @@ def download_ebooks(dir= '.', server='', formats=[], single_format=False, ignore
 
 def get_formats_to_download(book, accepted_formats=[], ignored_formats=[], single_format=False, min_size=0, max_size=0):
     print("Accepted formats", accepted_formats)
-
     source=book['source']
     print("Formats available in source: {}".format(list(source['formats'].keys())))
     my_formats=[]

diff --git a/calisuck.py b/calisuck.py
@@ -763,7 +763,7 @@ def filter_ebooks(dir= '.', server='', formats=[], single_format=False, ignored_
                             print ("{} format ignored: '{}'".format(uuid, f))
                             total_format_count +=1
                             save_ebook=True
-                    if save_ebook:æ
+                    if save_ebook:
                         save_metadata(dir, book)
                 else:
                     print()

diff --git a/.py → calisuck.py b/.py → calisuck.py
diff --git a/.py b/.py
@@ -0,0 +1,802 @@
+import sys
+import os
+import time
+import re
+import shutil
+import requests
+import json
+import fire
+from humanize import naturalsize as hsize
+from langid.langid import LanguageIdentifier, model
+import iso639
+import pickle
+
+
+
+all_ordered_formats=['azw', 'azw3', 'cbr', 'chm', 'djvu', 'doc', 'docx', 'epub', 'kepub', 'lit', 'lrf', 'mobi', 'original_epub', 'pdf', 'ppt', 'prc', 'rar', 'rtf', 'txt', 'zip', 'fb2']        
+identifier = LanguageIdentifier.from_modelstring(model, norm_probs=True)
+
+def load_metadata(path, uuid):
+    filepath=path+'/'+uuid+'/metadata.json'
+    print (filepath)
+    if os.path.isfile(filepath):
+        try:
+            with open(filepath, 'r') as fd:
+                return json.load(fd)
+        except:
+            print ("Error loading metadata for:", uuid, "from path:", path)
+            return 0
+    else:
+        print ("Metadata not found for:", uuid, "from path:", path)
+        return 0        
+
+def save_metadata(path, book):
+    filepath=path+'/'+book['uuid']+'/metadata.json'
+    print("Saving book metadata for:", book['uuid'], "to:", filepath)
+    os.makedirs(os.path.dirname(filepath+".tmp"), exist_ok=True)
+    with open(filepath+".tmp", 'w') as fd:
+        json.dump(book, fd, indent=4, separators=(',', ': '))
+    try:
+        shutil.move(filepath+".tmp", filepath)
+        print("Saved to:", filepath)
+    except:
+        print("Unable to rename .tmp file:", filepath+".tmp")
+
+
+def get_cover_path(path, uuid):
+    filepath=path+'/'+uuid+'/cover.jpg'
+    if os.path.isfile(filepath): return filepath
+    else: return 0
+
+def get_file_path(path, uuid, fileformat):
+    files=os.listdir(path+'/'+uuid)
+    if files:
+        for f in files:
+            fname, ext=os.path.splitext(f)
+            if ext =='.'+fileformat:
+                return path+'/'+uuid+'/'+f
+        else: return 0
+    else: return 0
+
+def get_cover(path, book):
+    url=book['source']['cover']
+    print("Downloading cover from:", url)
+
+    r=requests.get(url)
+    r.raise_for_status()
+
+    filepath=path+'/'+book['uuid']+'/cover.jpg'
+    os.makedirs(os.path.dirname(filepath+".tmp"), exist_ok=True)
+    with open(filepath+".tmp", 'wb') as fd:
+        fd.write(r.content)
+        shutil.move(filepath+".tmp", filepath)
+        print("Saved to:", filepath)
+
+
+def download_covers(dir= '.', server=''):
+    for root, dirs, files in os.walk(dir, topdown=True):
+        for d in dirs:
+            print() 
+            print("-->", d) 
+            book = load_metadata(root, d)
+            if book:
+                if book['source']['status'] != "ignored":
+                    if not get_cover_path(root, book['uuid']): 
+                        print(book['uuid'])
+                        try:
+                            get_cover(root, book)
+                        except:
+                            print ("Unable to get cover", book['uuid'])   
+                    else:
+                        print ("Cover already present:", book['uuid'])
+                else:
+                    print ('book {} in status {}: ignored'.format(book['uuid'], book['source']['status']))
+            else:
+                print ("No ebook metadata found in:", root)
+
+def index_ebooks1(dir= '.', server=''):
+    filepath=dir+'/.index'
+
+    if os.path.isfile(filepath):
+        index=pickle.load(open(filepath, 'rb'))
+    else:
+        index = {}
+        index['uuids']=[]
+        index['identifiers']={}
+        index['authors']={}
+        index['titles']={}   
+
+    for root, dirs, files in os.walk(dir, topdown=True):
+
+        for d in dirs:
+            print() 
+            print("-->", d) 
+            book = load_metadata(root, d)
+            if book:
+                if book['source']['status'] == "todo":
+                    print(book['uuid'])
+                    if not book['uuid'] in index['uuids']:
+                        index['uuids'].append(book['uuid']) 
+                    if book['title'] not in index['titles']:
+                        index['titles'][book['title']] = [book['uuid']]
+                    elif not book['uuid'] in index['titles'][book['title']]:
+                        index['titles'][book['title']].append(book['uuid'])
+                    # index['titles'][book['title']] = index['titles'].get(book['title'], []) + [book['uuid']]
+                    for a in book["authors"]:
+                        if a not in index['authors']:
+                            index['authors'][a] = [book['uuid']]
+                        elif not book['uuid'] in index['authors'][a]:
+                            index['authors'][a].append(book['uuid'])
+                        # index['authors'][a] = index['authors'].get(a, []) + [book['uuid']] 
+                    for k, i in book["identifiers"].items():
+                        if k not in index['identifiers']:
+                            index['identifiers'][k]={i:[book['uuid']]}
+                        elif i not in index['identifiers'][k]:    
+                            index['identifiers'][k][i]=[book['uuid']]
+                        elif not book['uuid'] in index['identifiers'][k][i]:
+                            index['identifiers'][k][i].append(book['uuid'])
+                        # index['identifiers'][k][i]=index['identifiers'].get(k, {}).get(i, []) + [book['uuid']] 
+                else:
+                    print ('book {} in status {}: ignored'.format(book['uuid'], book['source']['status']))
+            else:
+                print ("No ebook metadata found in:", root)
+    print("titles indexed:", len(index['titles'])) 
+    print("authors indexed:", len(index['authors']))
+    print("identifiers indexed:", len(index['identifiers']))  
+    # print("identifiers:",index['identifiers'])  
+    pickle.dump(index, open(filepath, 'wb'))
+
+def index_calibre_local(dir= '.', calibre_dir=''):
+    filepath=dir+'/.index'
+
+    if os.path.isfile(filepath):
+        index=pickle.load(open(filepath, 'rb'))
+    else:
+        index = {}
+        index['identifiers']={}
+        index['authors']={}
+        index['titles']={}   
+
+    for root, dirs, files in os.walk(dir, topdown=True):
+
+        for d in dirs:
+            print() 
+            print("-->", d) 
+            book = load_metadata(root, d)
+            if book:
+                if book['source']['status'] == "todo":
+                    print(book['uuid'])
+                    if book['title'] not in index['titles']:
+                        index['titles'][book['title']] = [book['uuid']]
+                    elif not book['uuid'] in index['titles'][book['title']]:
+                        index['titles'][book['title']].append(book['uuid'])
+                    # index['titles'][book['title']] = index['titles'].get(book['title'], []) + [book['uuid']]
+                    for a in book["authors"]:
+                        if a not in index['authors']:
+                            index['authors'][a] = [book['uuid']]
+                        elif not book['uuid'] in index['authors'][a]:
+                            index['authors'][a].append(book['uuid'])
+                        # index['authors'][a] = index['authors'].get(a, []) + [book['uuid']] 
+                    for k, i in book["identifiers"].items():
+                        if k not in index['identifiers']:
+                            index['identifiers'][k]={i:[book['uuid']]}
+                        elif i not in index['identifiers'][k]:    
+                            index['identifiers'][k][i]=[book['uuid']]
+                        elif not book['uuid'] in index['identifiers'][k][i]:
+                            index['identifiers'][k][i].append(book['uuid'])
+                        # index['identifiers'][k][i]=index['identifiers'].get(k, {}).get(i, []) + [book['uuid']] 
+                else:
+                    print ('book {} in status {}: ignored'.format(book['uuid'], book['source']['status']))
+            else:
+                print ("No ebook metadata found in:", root)
+    print("titles indexed:", len(index['titles'])) 
+    print("authors indexed:", len(index['authors']))
+    print("identifiers indexed:", len(index['identifiers']))  
+    # print("identifiers:",index['identifiers'])  
+    pickle.dump(index, open(filepath, 'wb'))
+
+
+def get_file_size(url):
+    print("Downloading size:", url)
+    r = requests.head(url)
+    r.raise_for_status()
+    size=r.headers['Content-Length']
+    print("Size received="+ hsize(size))
+    return int(size)
+
+def get_file(path, book, format):
+    uuid = book['uuid']
+    url=book['source']['formats'][format]['url']
+
+    print("Downloading ebook:", url)
+    print("Size expected (estimation):", hsize(book['source']['formats'][format]['size']))
+    r = requests.get(url)
+    # headers = {"Range": "bytes=0-1023"}
+    # r = requests.get(url, headers=headers)
+    r.raise_for_status()
+    # print(r.headers)
+    if('Content-Length' in r.headers ): 
+        print("Size received="+hsize(r.headers['Content-Length']))
+    else:
+        print("Fize received")
+
+    filename=re.findall(r'filename="(.*)"', r.headers['Content-Disposition'])
+    # print(filename)
+    if len(filename):
+        filepath=path+'/'+uuid+'/'+filename[0]
+    else:
+        filepath=path+'/'+uuid+'/'+uuid+"."+format
+
+    os.makedirs(os.path.dirname(filepath+".tmp"), exist_ok=True)
+    with open(filepath+".tmp", 'wb') as fd:
+        fd.write(r.content)
+        shutil.move(filepath+".tmp", filepath)
+        print("Saved to:", filepath)
+
+
+def set_status(uuid, status, dir='.'):
+    book = load_metadata(dir, uuid)
+    if book:
+        if book['source']['status'] != status: 
+            book['source']['status'] = status
+            save_metadata(dir, book)
+            print("Status changed to", status+":", book['uuid'])
+        else:
+            print("Status unchanged changed ", status+":", book['uuid'])
+    else:
+        print ("No ebook metadata found for:", uuid)
+
+
+
+def remove_book(uuid, path='.'):
+    print(os.getcwd())
+    bookpath=path+'/'+uuid
+    if os.path.isdir(bookpath): 
+        try:
+            shutil.rmtree(bookpath)
+            print(uuid, "removed")
+        except:
+            print("problem")
+    else:
+        print(uuid, "not found")
+
+
+
+def explore(site, help=False):
+    server=site
+    api=server+'ajax/'
+    print("Server:", server)
+    url=api+'library-info'
+    print()
+    print("Getting libraries:", server)
+    print(url)
+    try:
+        r = requests.get(url)
+        r.raise_for_status()
+    except:
+        print("Unable to open site:", url)
+        sys.exit(1)
+
+    libraries = r.json()["library_map"].keys()
+    print("Libraries:")    
+    for l in libraries:
+        library='/'+l
+        url=api+'search'+library+'?num=0'
+        try:
+            r = requests.get(url)
+            r.raise_for_status()
+        except:
+            print("Unable to open site:", url)
+            continue
+        print("\t{}: {} ebooks".format(l, r.json()["total_num"]))
+
+
+def update_done_status(book):
+    source=book['source']
+    if source['status']!='ignored':
+        if set(source['formats'].keys()) == set(book['formats']) & set(source['formats'].keys()):
+            book['source']['status']="done"
+        else: 
+            book['source']['status']="todo"
+
+
+def index_ebooks(site, library="", start=0, stop=0, dir=".", force_refresh=False):
+    offset= 0 if not start else start-1
+    num=500
+    server=site
+    api=server+'ajax/'
+    #api=server+'calibre/ajax/'
+    library= '/'+library if library else library 
+
+    print("Server:", server)
+    url=api+'search'+library+'?num=0'
+    print()
+    print("Getting ebooks count:", server)
+    print(url)
+    try:
+        r = requests.get(url)
+        r.raise_for_status()
+    except:
+        print("Unable to open site:", url)
+        sys.exit(1)
+    print("Total count=",r.json()["total_num"])
+    total_num=int(r.json()["total_num"])
+    total_num= total_num if not stop else stop
+
+    range=offset+1
+    while offset < total_num:
+        remaining_num = min(num, total_num - offset)
+        print()
+        print("Downloading ids: offset="+str(offset), "num="+str(remaining_num))
+        # url=api+'search?num='+str(remaining_num)+'&offset='+str(offset)
+        url=api+'search'+library+'?num='+str(remaining_num)+'&offset='+str(offset)+'&sort=timestamp&sort_order=desc'
+
+        print("->", url)
+        r=requests.get(url)
+        print("Ids received from:"+str(offset), "to:"+str(offset+remaining_num-1))
+
+        print()
+        print("Downloading metadata from", str(offset+1), "to", str(offset+remaining_num))
+        books_s=",".join(str(i) for i in r.json()['book_ids'])
+        url=api+'books'+library+'?ids='+books_s
+        print("->", url)
+        r=requests.get(url)
+        print(len(r.json()), "received")
+
+        for id in r.json().keys():                
+                print()
+                print ('--> range={}/{}'.format(str(range),str(total_num)))
+                uuid=r.json()[id]['uuid']
+                if not uuid:
+                    print ("No uuid for ebook: ignored")
+                    continue 
+                # print ('\r--> range={}/{}'.format(str(range),str(total_num)), "uuid="+uuid, "("+r.json()[id]['title']+")", end='')
+                # print (r.json()[id])
+                # title= r.json()[id]['title'] if 'title' in r.json()[id] else "<untitled>"
+                print("uuid="+uuid, "("+r.json()[id]['title']+")")
+
+                if not force_refresh:
+                    # print("Checking local metadata:", uuid)
+                    try:
+                        book = load_metadata(dir, uuid)
+                    except:
+                        print("Unable to get metadata from:", uuid)
+                        range+=1
+                        continue
+                    if book:
+                        print("Metadata already present for:", uuid)
+                        range+=1
+                        continue
+
+                if not r.json()[id]['formats']:
+                    print("No format found for {}".format(r.json()[id]['uuid']))
+                    range+=1
+                    continue
+
+
+                book={}
+                url=api+'book/'+id
+                book['title']=r.json()[id]['title']
+                book['authors']=r.json()[id]['authors']
+                book['series']=r.json()[id]['series']
+                book['series']=r.json()[id]['series']
+                book['series_index']=r.json()[id]['series_index']
+                book['edition']=0
+                book['uuid']=r.json()[id]['uuid']
+                book['identifiers']=r.json()[id]['identifiers']
+                book['comments']=r.json()[id]['comments']
+                book['pubdate']=r.json()[id]['pubdate']
+                book['publisher']=r.json()[id]['publisher']
+                languages=r.json()[id]['languages']
+                if not languages:
+                    pass
+                    # print ("Analyzing languages")
+                    # if book['comments']:
+                    #     text=book['comments']
+                    # else:
+                    #     text=book['title']
+                    # s_language, prob=identifier.classify(text)
+                    # print (s_language, prob)
+                    # if prob >= 0.85:
+                    #     language =  iso639.to_iso639_2(s_language)
+                    #     print("language=", language)
+                    #     book['languages']=[language]
+                    # else:
+                    #     book['languages']=[]
+                else:
+                    book['languages']=[]
+                    for l in languages:
+                        book['languages'].append(iso639.to_iso639_2(l))
+
+                book['tags']=r.json()[id]['tags']
+                book['formats']=[]
+                book['metadata_version']=0.1
+                source={}
+                source['url']=url
+                source['id']=id
+                try:
+                    tmpbook = load_metadata(dir, uuid)
+                except:
+                    print("Unable to get metadata from:", uuid)
+                    range+=1
+                    continue
+                if tmpbook and tmpbook['source']['status']=="ignored":
+                    source['status']="ignored"
+                else:
+                    source['status']="todo"
+                source['cover']=server+r.json()[id]['cover']  
+                source['timestamp']=r.json()[id]['timestamp']
+
+                format_sources={}
+                formats=r.json()[id]['formats']
+                for f in formats:
+                    s={}    
+                    url=''
+                    if f in r.json()[id]['main_format']:
+                        url=r.json()[id]['main_format'][f]
+                    else:
+                        url=r.json()[id]['other_formats'][f]
+                    s['url']=server+url
+
+                    if 'size' in r.json()[id]['format_metadata'][f]:
+                        s['size']=int(r.json()[id]['format_metadata'][f]['size'])
+                    else:
+                        print("Size not found for format '{}' : {}".format(f, uuid))
+                        print("Trying to get size online: {}".format(s['url']))
+                        try:
+                            s['size']=get_file_size(s['url'])
+                        except:
+                            print("Unable to access format '{}' : {} skipped".format(f, uuid))
+                            continue
+                    s['status']='todo'
+                    format_sources[f]=s
+
+                source['formats']=format_sources
+                book['source']=source
+
+                if not source['formats']:
+                    print("No format found for {}".format(r.json()[id]['uuid']))
+                    range+=1
+                    continue
+                update_done_status(book)
+                print("Saving metadata for:", uuid)
+                try:
+                    save_metadata(dir, book)
+                except:
+                    print("Unable to save book metadata", book['uuid'])
+                range+=1
+        offset=offset+num
+
+
+
+def has_languages(book, languages=[], ignore_empty_language=False):
+
+    print("Accepted languages", languages)
+    if not ignore_empty_language:
+            print("Unknown language accepted")
+
+    # rustine
+    if not 'languages' in book:        
+        book['languages']=[]
+
+    print("Book languages", book['languages'])
+
+    if ignore_empty_language and not book['languages']:
+        print ("'{}' ignored: language is empty".format(book['uuid']))
+        return False
+
+    if not ignore_empty_language and not book['languages']:
+        print ("'{}' todo: language is empty".format(book['uuid']))
+        return True
+
+    expected_languages=list(set(book['languages']) & set(languages))
+    if languages and not expected_languages:
+        print ("'{}' ignored: language {} not in {}".format(book['uuid'], book['languages'],languages))
+        return False
+
+    print ("'{}' todo: expected languages {}".format(book['uuid'], expected_languages))
+    return True
+
+def has_identifiers(book, identifiers=[], ignore_empty_identifiers=False):
+
+    print("Accepted identifiers", identifiers)
+    if not ignore_empty_identifiers:
+            print("Unknown identifiers accepted")
+    print("Book identifiers", book['identifiers'].keys())
+
+    if ignore_empty_identifiers and not book['identifiers']:
+        print ("'{}' ignored: identifier is empty".format(book['uuid']))
+        return False
+
+    if not ignore_empty_identifiers and not book['identifiers']:
+        print ("'{}' todo: identifiers is empty".format(book['uuid']))
+        return True
+
+    expected_identifiers=list(set(book['identifiers'].keys()) & set(identifiers))
+    if identifiers and not expected_identifiers:
+        print ("'{}' ignored: identifiers {} not in {}".format(book['uuid'], book['identifiers'].keys(), identifiers))
+        return False
+
+    print ("'{}' todo: expected identifiers {}".format(book['uuid'], expected_identifiers))
+    return True
+
+def download_ebooks(dir= '.', server='', formats=[], single_format=False, ignored_formats=[], languages=[], identifiers=[], min_size=0, max_size=0, ignore_empty_language=False, ignore_empty_identifiers=False, dry_run=False):
+    # all_ordered_formats=['azw', 'azw3', 'cbr', 'chm', 'djvu', 'doc', 'docx', 'epub', 'lit', 'lrf', 'mobi', 'original_epub', 'pdf', 'ppt', 'prc', 'rar', 'rtf', 'txt', 'zip']        
+
+    if single_format: my_formats = formats if formats else all_ordered_formats 
+    else: my_formats=formats
+    print("formats=", my_formats)
+
+    min_size=int(min_size)*1024*1024
+    max_size=int(max_size)*1024*1024
+    print ("Format expected between {} and {}".format(hsize(min_size), hsize(max_size) if max_size else "infinity"))
+
+    # sys.exit()
+
+    total_size=0
+    total_size_by_format={}
+    total_ebook_count=0
+    total_format_count=0
+    total_count_by_format={}
+    size_max=0
+    size_min=0
+    language_count={}
+    identifiers_count={}
+
+    for root, dirs, files in os.walk(dir, topdown=True):
+        for uuid in dirs:
+            book = load_metadata(root, uuid)
+            if book:
+                status=book['source']['status']
+                if status=="todo":
+                    print()
+                    print("-->", uuid, "("+book['title']+")")
+
+                    if not has_languages(book, languages=languages, ignore_empty_language=ignore_empty_language):
+                        continue
+
+                    if not has_identifiers(book, identifiers=identifiers, ignore_empty_identifiers=ignore_empty_identifiers):
+                        continue
+
+                    source=book['source']
+                    download_formats = get_formats_to_download(book, accepted_formats=my_formats, single_format=single_format, ignored_formats=ignored_formats, max_size=max_size, min_size=min_size)
+                    if not len(download_formats):
+                        print ("'{}' ignored: no more format available in formats expected {}".format(uuid, download_formats))
+                    else:
+                        ebook_kept=False
+                        for f in download_formats:
+                            url = source['formats'][f]['url']
+                            if url:
+                                if get_file_path(dir, uuid, f):
+                                    print ("Format '{}' already present for {}: Skipped".format(f, uuid))
+                                    continue
+
+                                print("Format '{}': size expected={}".format(f, hsize(source['formats'][f]['size'])))
+
+                                if not dry_run:    
+                                    try:
+                                        get_file(dir, book, f)
+                                        book['formats'].append(f)
+                                    # except:
+                                    except Exception as msg:
+                                        print("Unable to get book:", url)
+                                        print(msg)
+                                        continue
+                                    save_metadata(dir, book)
+
+                                ebook_kept=True
+                                size=source['formats'][f]['size']
+                                total_size += size 
+                                size_max = size if size>size_max else size_max
+                                if not size_min: 
+                                    size_min = size
+                                else: 
+                                    size_min = size if size<size_min else size_min
+
+                                if not f in total_size_by_format:
+                                    total_size_by_format[f] = size 
+                                else: total_size_by_format[f] +=size
+                                if not f in total_count_by_format:
+                                    total_count_by_format[f] = 1 
+                                else: 
+                                    total_count_by_format[f]+=1
+                                total_format_count +=1
+                            else:    
+                                print ("Format '{}' ignored for {} ({}): No url)".format(f, uuid, book['title']))
+                        if ebook_kept:
+                            total_ebook_count+=1
+                            if not book['languages']:
+                                if not '<unknown>' in language_count:
+                                    language_count['<unknown>'] = 1 
+                                else:
+                                    language_count['<unknown>']+=1
+                            else:
+                                for l in book['languages']:
+                                    if not l in language_count:
+                                        language_count[l] = 1 
+                                    else:
+                                        language_count[l]+=1
+                            if not book['identifiers']:
+                                if not '<unknown>' in identifiers_count:
+                                    identifiers_count['<unknown>'] = 1 
+                                else:
+                                    identifiers_count['<unknown>']+=1
+                            else:
+                                for l in book['identifiers'].keys():
+                                    if not l in identifiers_count:
+                                        identifiers_count[l] = 1 
+                                    else:
+                                        identifiers_count[l]+=1
+
+                    if not dry_run:
+                        update_done_status(book)
+                        if book['source']['status']=="done":
+                            save_metadata(dir, book)
+                            print("Book done:", book['uuid'])
+                    # total_ebook_count+=1
+                else:
+                    print()
+                    print("-->", uuid, "("+book['title']+")")
+                    print ('{} in status "{}": skipped'.format(book['uuid'], status))
+
+    print()
+    print("Total count of updated ebooks:", total_ebook_count)
+    print("Total ebooks updated by language:")
+    for l, c in language_count.items():
+        print("   '{}': {}".format(l, c))
+    print("Total ebooks updated by identifiers:")
+    for l, c in identifiers_count.items():
+        print("   '{}': {}".format(l, c))
+    print("Total count of formats:", total_format_count)
+    print("Total count of ebooks by format:")
+    for f, c in total_count_by_format.items():
+        print("\t'{}': {}".format(f, c))
+    print()
+    print("Total size:", hsize(total_size))
+    print("Maximum file size:", hsize(size_max))
+    print("Minimum file size:", hsize(size_min))
+    print("Total size by format:")
+    for f, s in total_size_by_format.items():
+        print("\t'{}': {}".format(f, hsize(s)))
+
+
+
+def get_formats_to_download(book, accepted_formats=[], ignored_formats=[], single_format=False, min_size=0, max_size=0):
+    print("Accepted formats", accepted_formats)
+
+    source=book['source']
+    print("Formats available in source: {}".format(list(source['formats'].keys())))
+    my_formats=[]
+    for f,v in source['formats'].items():
+        if v['status']=='todo':
+            my_formats.append(f)
+    print("Formats in 'todo': {}".format(my_formats))
+
+    formats=[]
+    if single_format:
+        if accepted_formats:
+            for f in accepted_formats:
+                if f in my_formats:
+                    formats=[f]
+                    break
+        else: 
+            print("need at least 1 format for ordering")
+    else:
+        if accepted_formats: 
+            formats=list(set(accepted_formats) & set(my_formats))
+        elif ignored_formats: 
+            formats = list(set(my_formats) - set(ignored_formats))
+        else:
+            formats=my_formats
+
+    print("Formats expected: {}".format(formats))
+
+    download_formats=formats[:]
+    for f in formats:
+        if not 'size' in source['formats'][f] and max_size:
+            print ("Format '{}' ignored for {}: Size unknown".format(f, book['uuid']))
+            download_formats.remove(f)
+        else:
+            size = source['formats'][f]['size']
+            if size < min_size or (max_size and size > max_size):
+                download_formats.remove(f)
+                print ("Format '{}' ignored for {}: size={} but expected between {} and {}".format(f, book['uuid'], hsize(size), hsize(min_size), hsize(max_size) if max_size else "infinity"))
+    return download_formats
+
+
+def update_format_statuses(book,refresh_ignored):
+    formats=book['source']['formats']
+    for f, v in formats.items():
+        if v['status']=='ignored' and not refresh_ignored:
+            print ("Format '{}' ignored: {} ({}))".format(f, book['uuid'], book['title']))
+        else:
+            print ("Format '{}' todo: {} ({}))".format(f, book['uuid'], book['title']))
+            book['source']['formats'][f]['status']='todo'
+
+def filter_ebooks(dir= '.', server='', formats=[], single_format=False, ignored_formats=[], languages=[], identifiers=[], min_size=0, max_size=0, ignore_empty_language=False, ignore_empty_identifiers=False):
+
+    if single_format: my_formats = formats if formats else all_ordered_formats 
+    else: my_formats=formats
+    print("formats=", my_formats)
+
+    min_size=int(min_size)*1024*1024
+    max_size=int(max_size)*1024*1024
+    print ("Format expected between {} and {}".format(hsize(min_size), hsize(max_size) if max_size else "infinity"))
+
+    # sys.exit()
+
+    total_ebook_count=0
+    total_format_count=0
+
+    for root, dirs, files in os.walk(dir, topdown=True):
+        for uuid in dirs:
+            book = load_metadata(root, uuid)
+            if book:
+                status=book['source']['status']
+                if status=="todo":
+                    print()
+                    print("-->", uuid, "("+book['title']+")")
+
+                    if not has_languages(book, languages=languages, ignore_empty_language=ignore_empty_language):
+                        book['source']['status']='ignored'
+                        print ("{} ignored: languages filtered".format(uuid))
+                        save_metadata(dir, book)
+                        total_ebook_count+=1
+                        continue
+
+                    if not has_identifiers(book, identifiers=identifiers, ignore_empty_identifiers=ignore_empty_identifiers):
+                        book['source']['status']='ignored'
+                        print ("{} ignored: identifiers filtered".format(uuid))
+                        save_metadata(dir, book)
+                        total_ebook_count+=1
+                        continue
+
+                    download_formats = get_formats_to_download(book, accepted_formats=my_formats, single_format=single_format, ignored_formats=ignored_formats, max_size=max_size, min_size=min_size)
+
+                    save_ebook=False
+                    source=book['source']
+                    formats_to_ignore=list(set(source['formats'].keys()) - set(book['formats']) - set(download_formats))
+                    print("formats to ignore:", formats_to_ignore)
+                    for f in formats_to_ignore:
+                        if source['formats'][f]['status']!='ignored':
+                            source['formats'][f]['status']='ignored' 
+                            print ("{} format ignored: '{}'".format(uuid, f))
+                            total_format_count +=1
+                            save_ebook=True
+                    if save_ebook:æ
+                        save_metadata(dir, book)
+                else:
+                    print()
+                    print("-->", uuid, "("+book['title']+")")
+                    print ('{} in status "{}": skipped'.format(book['uuid'], status))
+
+    print()
+    print("Total count of newly ignored ebooks:", total_ebook_count)
+    print("Total count of newly formats to ignore:", total_format_count)
+
+def reset_ignored(dir= '.', server=''):
+    for root, dirs, files in os.walk(dir, topdown=True):
+        for uuid in dirs:
+            save_ebook=False
+            book = load_metadata(root, uuid)
+            if book:
+                status=book['source']['status']
+                if status=="ignored":
+                    print ("'{}' status 'ignored' reset to 'todo'".format(book['uuid']))
+                    book['source']['status']='todo'
+                    save_ebook=True
+
+                formats=book['source']['formats']
+                for f, v in formats.items():
+                    if v['status']=='ignored':
+                        print ("'{}' format 'ignored' reset to 'todo'".format(book['uuid']))
+                        book['source']['formats'][f]['status']='todo'
+                        save_ebook=True
+
+                if save_ebook:
+                    save_metadata(dir, book)
+
+
+
+if __name__ == "__main__":
+    fire.Fire()