import shutil from os.path import expanduser, dirname, abspath, join, isfile from datetime import datetime as dt import lxml.html from biplist import readPlist, writePlist, InvalidPlistException, NotBinaryPlistException from time_uuid import TimeUUID """ TODO: - read/unread USE: - export pocket html file and put in same folder, run app.py ~~~ * safari will save all items for offline reading, if you have a huge collection, safari will be stuck for a while * after saving process completed better to uncheck/check safari in iCloud settings to update on iCloud devices. ~~~ """ # export -> 'https://getpocket.com/export' RIL_FILE = join(dirname(abspath(__file__)), "ril_export.html") SRL_FILE = expanduser('~/Library/Safari/Bookmarks.plist') def backup(): bk = expanduser('~/Library/Safari/Bookmarks~.plist') shutil.copy(SRL_FILE, bk) def parse_ril(): ril_file = open(RIL_FILE, "r") raw_string = lxml.html.fromstring(ril_file.read()) raw_html = raw_string.xpath("//body/ul/li") parsed = [] for item in raw_html: for p in item.iter(): if p.tag == 'a': title = p.text time_added = p.attrib['time_added'] href = p.attrib['href'] tags = [] if len(p.attrib['tags']): # SRL doesnt support tags tags = p.attrib['tags'].split(',') o = (title, time_added, href, tags) parsed.append(o) ril_file.close() return parsed def import_srl(ril_data): try: plist = readPlist(SRL_FILE) except (InvalidPlistException, NotBinaryPlistException), e: print "Not a plist:", e return parent = None sub_items = [] entries = [] ServerID = None AddedLocally = True WebBookmarkType = 'WebBookmarkTypeLeaf' #'Root' -> 'Children' -> 'ItemX'(WebBookmarkTypeList) -> # 'Children' -> 'ItemY' -> 'ReadingListNonSync' for item in plist['Children']: if item.get('Title') == 'com.apple.ReadingList': parent = item for sub_item in item['Children']: if sub_item.get('ReadingListNonSync'): sub_items.append(sub_item) break if not parent: print "couldn't find parent" return else: ServerID = item['Children'][0]['Sync']['ServerID'] Key = item['Children'][0]['Sync']['Key'] for (title, time_added, href, tags) in reversed(ril_data): ############################### # +URIDictionary # |--title :string # | # +Sync # |--ServerID :string # | # +ReadingListNonSync # |--AddedLocally :bool # | # |WebBookmarkType :string # |WebBookmarkUUID :string # |URLString :string # | # +ReadingList # |--DateAdded :date # #print("%s\n%s\n%s\n%s\n" % (title, time_added, href, tags)) WebBookmarkUUID = TimeUUID.convert(long(time_added), randomize=False).get_urn()[9:] have = False for sub_item in sub_items: if sub_item['WebBookmarkUUID'] == WebBookmarkUUID: have = True break if have: continue DateAdded = dt.utcfromtimestamp(long(time_added)) entry = dict() URIDictionary = dict() Sync = dict() ReadingListNonSync = dict() ReadingList = dict() URIDictionary['title'] = title entry['URIDictionary'] = URIDictionary Sync['ServerID'] = ServerID Sync['Key'] = Key entry['Sync'] = Sync ReadingListNonSync['AddedLocally'] = AddedLocally entry['ReadingListNonSync'] = ReadingListNonSync entry['WebBookmarkType'] = WebBookmarkType entry['WebBookmarkUUID'] = WebBookmarkUUID entry['URLString'] = href ReadingList['DateAdded'] = DateAdded entry['ReadingList'] = ReadingList entries.append(entry) parent['Children'].extend(entries) try: if len(entries): writePlist(plist, SRL_FILE) print "~~~ have %d reading list items ~~~" % len(parent['Children']) except (InvalidPlistException, NotBinaryPlistException), e: print "couldnt write plist: ", e if __name__ == '__main__': if not isfile(SRL_FILE): raise Exception("safari bookmarks file not found!") if not isfile(RIL_FILE): raise Exception("pocket export file not found!") backup() import_srl(parse_ril())