Skip to content

Instantly share code, notes, and snippets.

@aolle
Forked from dogboydog/convert.py
Created April 18, 2021 17:33
Show Gist options
  • Select an option

  • Save aolle/6e595650391deef79ffb1c9bb38fb6e9 to your computer and use it in GitHub Desktop.

Select an option

Save aolle/6e595650391deef79ffb1c9bb38fb6e9 to your computer and use it in GitHub Desktop.
Convert Nimbus Notes HTML to Markdown for Joplin
# -------------------------------------------------------------------------
# Nimbus note HTML export to markdown converter
# Extract all zip files containing 'note.html' and convert to markdown
#
# Setup:
# 1) install python 3 for your OS
# 2) install pandoc https://github.com/jgm/pandoc/releases/tag/2.11.4
# on Windows, the .msi will automatically add pandoc to your $PATH
# otherwise add it to your $PATH.
# 3) save this script in the directory where your HTML exports were
# exported. Open a terminal / command prompt and cd to the directory
# where you saved convert.py.
# 4) Issue the command "python convert.py"
# (add the word "debug" afterward for extra output: python convert.py debug)
# 5) To use for Joplin import, Use File -> Import -> Markdown (Directory)
# and select the 'converted' directory that is created by this script
#
# Happy note-taking. -dogboydog
# -------------------------------------------------------------------------
import os
import pathlib
import re
import shutil
import subprocess
import sys
from os.path import abspath
from zipfile import ZipFile
notes_written = 0
notes_failed = 0
sep = os.path.sep
color = not ('no-color' in sys.argv[1:] or 'NO_COLOR' in os.environ)
debug_on = 'DEBUG' in os.environ and os.environ['DEBUG'] != "0"
if ('debug' in sys.argv[1:]):
debug_on = True
clean = True # set to False to keep html files from conversion
class _c:
HEADER = '\033[95m' if color else ''
BLUE = '\u001b[34m' if color else ''
CYAN = '\033[96m' if color else ''
GREEN = '\033[92m' if color else ''
YELLOW = '\u001b[33m' if color else ''
RED = '\033[91m' if color else ''
ENDC = '\033[0m' if color else ''
BOLD = '\033[1m' if color else ''
UNDERLINE = '\033[4m' if color else ''
html_extension = ".html"
zip_extension = ".zip"
def log_debug(message):
if debug_on:
print(f"{_c.BLUE}{message}{_c.ENDC}")
def remove_empty_dir(empty_dir):
try:
os.removedirs(empty_dir)
log_debug(f"Deleted empty directory '{empty_dir}'")
except OSError:
pass
# recursively delete empty directories
def remove_empty_dirs(path):
# topdown False: start with deepest nested directories
for root, dirnames, filenames in os.walk(path, topdown=False):
for dirname in dirnames:
remove_empty_dir(os.path.realpath(os.path.join(root, dirname)))
def clean_up():
if not clean:
return
clean_extensions = [".woff2", ".css", ".woff",
".ttf", "icomoon.svg", "icomoon.eot"]
for directory, subdirlist, filelist in os.walk('converted'):
for f in filelist:
parent_dir = f"{converted_dir}"
for clean_ext in clean_extensions:
if (f.endswith(clean_ext)):
os.unlink(f"{directory}{sep}{f}")
remove_empty_dirs("converted")
def write_note(html_file, markdown_destination):
global notes_written, notes_failed
print(f"Writing markdown to {markdown_destination}")
pandoc_run = subprocess.run(
["pandoc", html_file,
"--from", "html", "--to", "markdown_strict-raw_html"],
capture_output=True,
shell=True)
if pandoc_run.returncode != 0:
print(pandoc_run.stderr.decode())
print(f"Failed to convert {html_file}")
notes_failed += 1
else:
log_debug(pandoc_run.stdout.decode())
with open(markdown_destination, "w", encoding="utf-8") as markdown_fp:
markdown_content = pandoc_run.stdout.decode()
markdown_fp.write(markdown_content)
notes_written += 1
print(
f"Searching for zip files containing HTML to convert...")
for directory, subdirlist, filelist in os.walk('.'):
for f in filelist:
if (f.endswith(zip_extension)):
print(f"Found zipped note: {f}")
with ZipFile(f"{directory}{sep}{f}", 'r') as zip:
converted_dir = f"converted{sep}{directory}"
pathlib.Path(converted_dir).resolve().mkdir(
parents=True, exist_ok=True)
zip.extractall(converted_dir)
for file_in_zip in zip.infolist():
if file_in_zip.is_dir():
continue
file_in_zip_ext = file_in_zip.filename[len(
file_in_zip.filename)-len(html_extension):]
if html_extension in file_in_zip_ext.lower():
note_new_filename = zip.filename[0:len(
zip.filename)-len(zip_extension)] + ".html"
old_path = pathlib.Path(
f"{converted_dir}{sep}{file_in_zip.filename}").resolve()
new_path = pathlib.Path(
f"converted{sep}{note_new_filename}")
log_debug(f"Renaming {old_path} to {new_path}")
shutil.move(old_path, new_path)
print(
f"Will try to convert all HTML notes in the current directory to Markdown")
for directory, subdirlist, filelist in os.walk('converted'):
for f in filelist:
converted_dir = "converted"
parent_dir = f"{converted_dir}"
if (f.endswith(html_extension)):
note_name = f.replace(html_extension, "").strip()
root = f"{directory}{sep}"
html_note = f"{root}{f}"
print(f"Found HTML note: {html_note}")
parent_dir = f"{directory}"
parent_dir_pathlib = pathlib.Path(parent_dir).resolve()
log_debug(f"mkdir {parent_dir_pathlib}")
parent_dir_pathlib.mkdir(parents=True, exist_ok=True)
markdown_destination = f"{parent_dir_pathlib}{sep}{ note_name }.md"
write_note(html_note, markdown_destination)
if clean:
os.unlink(html_note)
clean_up()
print(f"\n{_c.GREEN}Wrote {notes_written} notes.{_c.ENDC}")
if notes_failed > 0:
print(f"{_c.RED}{notes_failed} notes failed to convert :( {_c.ENDC}")
@palashman
Copy link

palashman commented Aug 20, 2025

Has anyone tried to improve the script? So that It would not break lines or do not add empty lines after each text line, and could understand tables properly, etc?

I just cope-pasted the same page from Nimbus directly to Obsidian and it looks much much better. You will spend a lot of time fixing script output, and more faster would be to cope-paste, cope-paste -))

@palashman
Copy link

UPDATE:
Use Jimmy. It's the best converter from Nimbus to Markdown!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment