Last active
May 26, 2019 15:29
-
-
Save pushpendrapratap/ed2dd1714bad002b05a25e90042ea93a to your computer and use it in GitHub Desktop.
Revisions
-
pushpendrapratap revised this gist
May 26, 2019 . No changes.There are no files selected for viewing
-
pushpendrapratap created this gist
May 2, 2019 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,81 @@ from bs4 import BeautifulSoup # copied from https://github.com/bookmarks-tools/bookmarks-parser/blob/master/bookmarks_parser/bookmarks_parser.py def get_node_data(node): data = {} for child in node: if child.name == 'a': data['type'] = 'bookmark' data['url'] = child.get('href') data['title'] = child.text data['add_date'] = child.get('add_date') data['icon'] = child.get('icon') # only in FF icon_uri = child.get('icon_uri') if icon_uri: data['icon_uri'] = icon_uri tags = child.get('tags') if tags: data['tags'] = tags.split(',') elif child.name == 'h3': data['type'] = 'folder' data['title'] = child.text data['add_date'] = child.get('add_date') data['last_modified'] = child.get('last_modified') data['ns_root'] = None # for Bookmarks Toolbar in FF and Bookmarks bar in Chrome if child.get('personal_toolbar_folder'): data['ns_root'] = 'toolbar' # FF Other Bookmarks if child.get('unfiled_bookmarks_folder'): data['ns_root'] = 'other_bookmarks' elif child.name == 'dl': # store DL element reference for further processing the child nodes data['__dir_dl'] = child if data['type'] == 'folder' and not data.get('__dir_dl'): if node.next_sibling and node.next_sibling.name == "dd": dls = node.next_sibling.find_all('dl') if dls: data['__dir_dl'] = dls[0] return data def process_dir(bookmark_dir, level): items = [] menu_root = None for child in bookmark_dir: if child.name != 'dt': continue item_data = get_node_data(child) if level == 0 and (not item_data.get('ns_root')): if menu_root is None: # For chrome if child.previous_sibling.name == "dt": menu_root = {'title': "Other bookmarks", 'children': [], 'ns_root': 'menu'} # for FF else: menu_root = {'title': "Bookmarks Menu", 'children': [], 'ns_root': 'menu'} if item_data.get('__dir_dl'): item_data['children'] = process_dir(item_data['__dir_dl'], level + 1) del item_data['__dir_dl'] menu_root['children'].append(item_data) else: if item_data.get('__dir_dl'): item_data['children'] = process_dir(item_data['__dir_dl'], level + 1) del item_data['__dir_dl'] items.append(item_data) if menu_root: items.append(menu_root) return items def parse(file_path): with open(file_path, 'rb') as f: soup = BeautifulSoup(f, "html5lib") dls = soup.find_all('dl') bookmarks = process_dir(dls[0], 0) return bookmarks