Last active
November 21, 2015 11:47
-
-
Save randombrein/4c6de353330b7d8febfc to your computer and use it in GitHub Desktop.
Revisions
-
randombrein revised this gist
Jul 13, 2014 . 1 changed file with 27 additions and 25 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,19 +1,32 @@ import shutil from os.path import expanduser, dirname, abspath, join, isfile from datetime import datetime as dt import lxml.html from biplist import readPlist, writePlist, InvalidPlistException, NotBinaryPlistException from time_uuid import TimeUUID """ TODO: - read/unread USE: - export pocket html file and put in same folder, run app.py ~~~ * safari will save all items for offline reading, if you have a huge collection, safari will be stuck for a while * after saving process completed better to uncheck/check safari in iCloud settings to update on iCloud devices. ~~~ """ # export -> 'https://getpocket.com/export' RIL_FILE = join(dirname(abspath(__file__)), "ril_export.html") SRL_FILE = expanduser('~/Library/Safari/Bookmarks.plist') def backup(): bk = expanduser('~/Library/Safari/Bookmarks~.plist') shutil.copy(SRL_FILE, bk) def parse_ril(): @@ -53,10 +66,6 @@ def import_srl(ril_data): sub_items = [] entries = [] ServerID = None AddedLocally = True WebBookmarkType = 'WebBookmarkTypeLeaf' @@ -79,28 +88,22 @@ def import_srl(ril_data): ServerID = item['Children'][0]['Sync']['ServerID'] Key = item['Children'][0]['Sync']['Key'] for (title, time_added, href, tags) in reversed(ril_data): ############################### # +URIDictionary # |--title :string # | # +Sync # |--ServerID :string # | # +ReadingListNonSync # |--AddedLocally :bool # | # |WebBookmarkType :string # |WebBookmarkUUID :string # |URLString :string # | # +ReadingList # |--DateAdded :date # @@ -118,7 +121,7 @@ def import_srl(ril_data): continue DateAdded = dt.utcfromtimestamp(long(time_added)) entry = dict() URIDictionary = dict() Sync = dict() @@ -132,17 +135,13 @@ def import_srl(ril_data): Sync['Key'] = Key entry['Sync'] = Sync ReadingListNonSync['AddedLocally'] = AddedLocally entry['ReadingListNonSync'] = ReadingListNonSync entry['WebBookmarkType'] = WebBookmarkType entry['WebBookmarkUUID'] = WebBookmarkUUID entry['URLString'] = href ReadingList['DateAdded'] = DateAdded entry['ReadingList'] = ReadingList @@ -151,13 +150,16 @@ def import_srl(ril_data): parent['Children'].extend(entries) try: if len(entries): writePlist(plist, SRL_FILE) print "~~~ have %d reading list items ~~~" % len(parent['Children']) except (InvalidPlistException, NotBinaryPlistException), e: print "couldnt write plist: ", e if __name__ == '__main__': if not isfile(SRL_FILE): raise Exception("safari bookmarks file not found!") if not isfile(RIL_FILE): raise Exception("pocket export file not found!") backup() import_srl(parse_ril()) -
randombrein revised this gist
Jun 19, 2014 . 1 changed file with 7 additions and 5 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,3 +1,5 @@ from os.path import expanduser from shutil import copyfile from datetime import datetime as dt import lxml.html from biplist import readPlist, writePlist, InvalidPlistException, NotBinaryPlistException @@ -8,8 +10,8 @@ # 1-read/unread SRL_FILE = expanduser('~/Library/Safari/Bookmarks.plist') SRL_FILE_BK = expanduser('~/Library/Safari/Bookmarks~.plist') # export -> 'https://getpocket.com/export' RIL_FILE = "/path/to/your/ril_export.html" @@ -116,8 +118,7 @@ def import_srl(ril_data): continue DateAdded = dt.utcfromtimestamp(long(time_added)) entry = dict() URIDictionary = dict() Sync = dict() @@ -133,7 +134,7 @@ def import_srl(ril_data): ReadingListNonSync['ArchiveOnDisk'] = ArchiveOnDisk ReadingListNonSync['FetchResult'] = FetchResult # ReadingListNonSync['DateLastFetched'] = DateLastFetched ReadingListNonSync['AddedLocally'] = AddedLocally entry['ReadingListNonSync'] = ReadingListNonSync @@ -150,6 +151,7 @@ def import_srl(ril_data): parent['Children'].extend(entries) try: if len(entries): copyfile(SRL_FILE, SRL_FILE_BK) writePlist(plist, SRL_FILE) print "~~~ have %d reading list items ~~~" % len(parent['Children']) except (InvalidPlistException, NotBinaryPlistException), e: -
randombrein revised this gist
Jun 16, 2014 . 1 changed file with 1 addition and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -78,7 +78,7 @@ def import_srl(ril_data): Key = item['Children'][0]['Sync']['Key'] for (title, time_added, href, tags) in reversed(ril_data): ############################### # +URIDictionary # |--title :string -
randombrein created this gist
Jun 16, 2014 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,161 @@ from datetime import datetime as dt import lxml.html from biplist import readPlist, writePlist, InvalidPlistException, NotBinaryPlistException from time_uuid import TimeUUID ########## # TODO: # 1-read/unread # '~/Library/Safari/Bookmarks.plist' SRL_FILE = "/path/to/your/Bookmarks.plist" # export -> 'https://getpocket.com/export' RIL_FILE = "/path/to/your/ril_export.html" def parse_ril(): ril_file = open(RIL_FILE, "r") raw_string = lxml.html.fromstring(ril_file.read()) raw_html = raw_string.xpath("//body/ul/li") parsed = [] for item in raw_html: for p in item.iter(): if p.tag == 'a': title = p.text time_added = p.attrib['time_added'] href = p.attrib['href'] tags = [] if len(p.attrib['tags']): # SRL doesnt support tags tags = p.attrib['tags'].split(',') o = (title, time_added, href, tags) parsed.append(o) ril_file.close() return parsed def import_srl(ril_data): try: plist = readPlist(SRL_FILE) except (InvalidPlistException, NotBinaryPlistException), e: print "Not a plist:", e return parent = None sub_items = [] entries = [] ServerID = None Key = None ArchiveOnDisk = True FetchResult = 0 DateLastFetched = None AddedLocally = True WebBookmarkType = 'WebBookmarkTypeLeaf' #'Root' -> 'Children' -> 'ItemX'(WebBookmarkTypeList) -> # 'Children' -> 'ItemY' -> 'ReadingListNonSync' for item in plist['Children']: if item.get('Title') == 'com.apple.ReadingList': parent = item for sub_item in item['Children']: if sub_item.get('ReadingListNonSync'): sub_items.append(sub_item) break if not parent: print "couldn't find parent" return else: ServerID = item['Children'][0]['Sync']['ServerID'] Key = item['Children'][0]['Sync']['Key'] for (title, time_added, href, tags) in ril_data: ############################### # +URIDictionary # |--title :string # | # +Sync # |--ServerID :string # |--Key :string # | # +ReadingListNonSync # |--ArchiveOnDisk :bool # |--FetchResult :number # |--DateLastFetched :date # |--AddedLocally :bool # | # |WebBookmarkType :string # |WebBookmarkUUID :string # |URLString :string # | # +ReadingList # |--PreviewText :string # |--DateAdded :date # #print("%s\n%s\n%s\n%s\n" % (title, time_added, href, tags)) WebBookmarkUUID = TimeUUID.convert(long(time_added), randomize=False).get_urn()[9:] have = False for sub_item in sub_items: if sub_item['WebBookmarkUUID'] == WebBookmarkUUID: have = True break if have: continue DateAdded = dt.utcfromtimestamp(long(time_added)) DateLastFetched = DateAdded # TODO entry = dict() URIDictionary = dict() Sync = dict() ReadingListNonSync = dict() ReadingList = dict() URIDictionary['title'] = title entry['URIDictionary'] = URIDictionary Sync['ServerID'] = ServerID Sync['Key'] = Key entry['Sync'] = Sync ReadingListNonSync['ArchiveOnDisk'] = ArchiveOnDisk ReadingListNonSync['FetchResult'] = FetchResult ReadingListNonSync['DateLastFetched'] = DateLastFetched ReadingListNonSync['AddedLocally'] = AddedLocally entry['ReadingListNonSync'] = ReadingListNonSync entry['WebBookmarkType'] = WebBookmarkType entry['WebBookmarkUUID'] = WebBookmarkUUID entry['URLString'] = href ReadingList['PreviewText'] = title ReadingList['DateAdded'] = DateAdded entry['ReadingList'] = ReadingList entries.append(entry) parent['Children'].extend(entries) try: if len(entries): writePlist(plist, SRL_FILE) print "~~~ have %d reading list items ~~~" % len(parent['Children']) except (InvalidPlistException, NotBinaryPlistException), e: print "couldnt write plist: ", e if __name__ == '__main__': import_srl(parse_ril())