Skip to content

Instantly share code, notes, and snippets.

@thiborose
thiborose / obsidian2html.py
Created October 16, 2023 20:09
Convert each markdown file from an obsidian vault to html, preserving formatting and images. You can then import the vault with Apple Notes.
import os
import re
# regex to match the image format of Obsidian
old_format_pattern = r'!\[\[([\w\s.-]+)\]\]'
# Define a function to replace the obsidian format with the standard markdown format
def replace_image_links(file_path, match):
relative_filename = match.group(1)
absolute_filename = os.path.abspath(os.path.join(os.path.dirname(file_path), relative_filename))
@thiborose
thiborose / loop_pattern_replace.py
Created December 9, 2022 13:13
replace each occurence of a regex pattern with a different value
# example - replace all occurences of 4 digits with 4 new random digits in an XML file
import random
import re
content = open("in.xml", "r", encoding="utf-8").read()
numbers = '(one|two|three|four|five|six|seven|eight|nine|zero|null)'
pattern = re.compile(f'{numbers}\s{numbers}\s{numbers}\s{numbers}')
__author__ = "xxxx"
__version__ = "0.0.0"
__date__ = "January 01, 2000"
import sys, argparse
from loguru import logger
# code....
# ...
# set up logging
logging_level = logging.DEBUG if args.debug else logging.INFO
logging.basicConfig(
level=logging_level,
format="[%(asctime)s:%(module)s:%(levelname)s] %(message)s",
handlers=[
logging.FileHandler("logs.txt"),
logging.StreamHandler(sys.stdout)
]
@thiborose
thiborose / conllu.py
Last active July 1, 2021 21:45
Get the conllu annotation for a given string
def get_conllu(sentence:str, lang="en", to_file:str = None):
"""
Takes a string,
output a file in the CONLLU format
"""
url = "http://lindat.mff.cuni.cz/services/udpipe/api/process"
parameters = {
"data":sentence,
@thiborose
thiborose / spacy_starter_pack.py
Last active June 16, 2021 13:06
A customized spacy pipeline to characterize texts, encompassing an improved sentencizer.
import spacy
from spacy import Language
from spacy.tokens import Doc
import pysbd # improved sentencizer
# Setting up spacy
@Language.component("pysbd_sentence_boundaries")
def pysbd_sentence_boundaries(doc):
"""improved sentence segmenter"""
seg = pysbd.Segmenter(language="fr", clean=False, char_span=True) #Specify language here