pushpendrapratap · May 26, 2019 15:29 · pushpendrapratap · May 2, 2019
diff --git a/g-bookmark-parser.py b/g-bookmark-parser.py
 from bs4 import BeautifulSoup


 # copied from https://github.com/bookmarks-tools/bookmarks-parser/blob/master/bookmarks_parser/bookmarks_parser.py

 def get_node_data(node):
    data = {}
    for child in node:
        if child.name == 'a':
            data['type'] = 'bookmark'
            data['url'] = child.get('href')
            data['title'] = child.text
            data['add_date'] = child.get('add_date')
            data['icon'] = child.get('icon')
            # only in FF
            icon_uri = child.get('icon_uri')
            if icon_uri:
                data['icon_uri'] = icon_uri
            tags = child.get('tags')
            if tags:
                data['tags'] = tags.split(',')
        elif child.name == 'h3':
            data['type'] = 'folder'
            data['title'] = child.text
            data['add_date'] = child.get('add_date')
            data['last_modified'] = child.get('last_modified')

            data['ns_root'] = None
            # for Bookmarks Toolbar in FF and Bookmarks bar in Chrome
            if child.get('personal_toolbar_folder'):
                data['ns_root'] = 'toolbar'
            # FF Other Bookmarks
            if child.get('unfiled_bookmarks_folder'):
                data['ns_root'] = 'other_bookmarks'
        elif child.name == 'dl':
            # store DL element reference for further processing the child nodes
            data['__dir_dl'] = child

    if data['type'] == 'folder' and not data.get('__dir_dl'):
        if node.next_sibling and node.next_sibling.name == "dd":
            dls = node.next_sibling.find_all('dl')
            if dls:
                data['__dir_dl'] = dls[0]
    return data


 def process_dir(bookmark_dir, level):
    items = []
    menu_root = None
    for child in bookmark_dir:
        if child.name != 'dt':
            continue
        item_data = get_node_data(child)
        if level == 0 and (not item_data.get('ns_root')):
            if menu_root is None:
                # For chrome
                if child.previous_sibling.name == "dt":
                    menu_root = {'title': "Other bookmarks", 'children': [], 'ns_root': 'menu'}
                # for FF
                else:
                    menu_root = {'title': "Bookmarks Menu", 'children': [], 'ns_root': 'menu'}
            if item_data.get('__dir_dl'):
                item_data['children'] = process_dir(item_data['__dir_dl'], level + 1)
                del item_data['__dir_dl']
            menu_root['children'].append(item_data)
        else:
            if item_data.get('__dir_dl'):
                item_data['children'] = process_dir(item_data['__dir_dl'], level + 1)
                del item_data['__dir_dl']
            items.append(item_data)
    if menu_root:
        items.append(menu_root)
    return items


 def parse(file_path):
    with open(file_path, 'rb') as f:
        soup = BeautifulSoup(f, "html5lib")
    dls = soup.find_all('dl')
    bookmarks = process_dir(dls[0], 0)
    return bookmarks
	from bs4 import BeautifulSoup


	# copied from https://github.com/bookmarks-tools/bookmarks-parser/blob/master/bookmarks_parser/bookmarks_parser.py

	def get_node_data(node):
	data = {}
	for child in node:
	if child.name == 'a':
	data['type'] = 'bookmark'
	data['url'] = child.get('href')
	data['title'] = child.text
	data['add_date'] = child.get('add_date')
	data['icon'] = child.get('icon')
	# only in FF
	icon_uri = child.get('icon_uri')
	if icon_uri:
	data['icon_uri'] = icon_uri
	tags = child.get('tags')
	if tags:
	data['tags'] = tags.split(',')
	elif child.name == 'h3':
	data['type'] = 'folder'
	data['title'] = child.text
	data['add_date'] = child.get('add_date')
	data['last_modified'] = child.get('last_modified')

	data['ns_root'] = None
	# for Bookmarks Toolbar in FF and Bookmarks bar in Chrome
	if child.get('personal_toolbar_folder'):
	data['ns_root'] = 'toolbar'
	# FF Other Bookmarks
	if child.get('unfiled_bookmarks_folder'):
	data['ns_root'] = 'other_bookmarks'
	elif child.name == 'dl':
	# store DL element reference for further processing the child nodes
	data['__dir_dl'] = child

	if data['type'] == 'folder' and not data.get('__dir_dl'):
	if node.next_sibling and node.next_sibling.name == "dd":
	dls = node.next_sibling.find_all('dl')
	if dls:
	data['__dir_dl'] = dls[0]
	return data


	def process_dir(bookmark_dir, level):
	items = []
	menu_root = None
	for child in bookmark_dir:
	if child.name != 'dt':
	continue
	item_data = get_node_data(child)
	if level == 0 and (not item_data.get('ns_root')):
	if menu_root is None:
	# For chrome
	if child.previous_sibling.name == "dt":
	menu_root = {'title': "Other bookmarks", 'children': [], 'ns_root': 'menu'}
	# for FF
	else:
	menu_root = {'title': "Bookmarks Menu", 'children': [], 'ns_root': 'menu'}
	if item_data.get('__dir_dl'):
	item_data['children'] = process_dir(item_data['__dir_dl'], level + 1)
	del item_data['__dir_dl']
	menu_root['children'].append(item_data)
	else:
	if item_data.get('__dir_dl'):
	item_data['children'] = process_dir(item_data['__dir_dl'], level + 1)
	del item_data['__dir_dl']
	items.append(item_data)
	if menu_root:
	items.append(menu_root)
	return items


	def parse(file_path):
	with open(file_path, 'rb') as f:
	soup = BeautifulSoup(f, "html5lib")
	dls = soup.find_all('dl')
	bookmarks = process_dir(dls[0], 0)
	return bookmarks