Skip to content

Instantly share code, notes, and snippets.

@christian-smith
Created January 5, 2019 14:18
Show Gist options
  • Select an option

  • Save christian-smith/aa0e8ddf7221ac7a88731247c87cc29a to your computer and use it in GitHub Desktop.

Select an option

Save christian-smith/aa0e8ddf7221ac7a88731247c87cc29a to your computer and use it in GitHub Desktop.

Revisions

  1. christian-smith created this gist Jan 5, 2019.
    78 changes: 78 additions & 0 deletions webloc2orgmode.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,78 @@
    """
    webloc2orgmode.py
    This script will take Apple webloc files (such as bookmarks made in DEVONthink) and export them as org-mode entries.
    It will also save the timestamp and tag information for each webloc
    Usage:
    $ pip3 install lxml biplist xattr bs4
    $ touch ~/bookmarks.org
    $ mkdir done
    $ mkdir weblocs
    (move all webloc files to the weblocs directory)
    $ python3 webloc2orgmode.py
    All parsed webloc's will have moved to the done/ directory
    """

    import biplist
    import glob
    import lxml.html
    import os
    import requests
    import shutil
    import time
    import xattr
    from bs4 import BeautifulSoup
    from lxml import etree
    from struct import unpack
    from time import sleep

    for filename in glob.glob('bookmarks/*.webloc'):
    tree = etree.parse(filename).getroot()
    dict = tree.getchildren()[0]
    url = dict.getchildren()[1].text

    tags = []

    try:
    tagsPlist = xattr.getxattr(filename, 'com.apple.metadata:_kMDItemUserTags')
    tags = biplist.readPlistFromString(tagsPlist)
    except:
    print("tagerror")

    formattedTags = []

    if len(tags) > 0:
    for tag in tags:
    formattedTags.append(tag.replace('\n0', ''))

    stat = os.stat(filename)
    date = time.strftime('[%Y-%m-%d %a %H:%M:%S %z]', time.localtime(stat.st_birthtime))

    title = 'Undefined'

    try:
    request = requests.get(url)
    soup = BeautifulSoup(request.content, "lxml")

    if soup.title is not None:
    title = soup.title.string

    except:
    print("url error")

    line1 = ''

    if title and len(formattedTags) > 0:
    line1 = "* [[%s][%s]] :%s:" % (url, title, ':'.join(formattedTags))
    else:
    line1 = "* [[%s][%s]]" % (url, title)


    properties = ":PROPERTIES:\n:CREATED: %s\n:END:" % date

    homedir = os.path.expanduser('~')
    print(line1, file=open(homedir + "/bookmarks.org", "a"))
    print(properties, file=open(homedir + "/bookmarks.org", "a"))
    shutil.move(filename, "done/")