Thibo Rosemplatt thiborose

thiborose / obsidian2html.py

Created October 16, 2023 20:09

Convert each markdown file from an obsidian vault to html, preserving formatting and images. You can then import the vault with Apple Notes.

	import os
	import re

	# regex to match the image format of Obsidian
	old_format_pattern = r'!\[\[([\w\s.-]+)\]\]'

	# Define a function to replace the obsidian format with the standard markdown format
	def replace_image_links(file_path, match):
	relative_filename = match.group(1)
	absolute_filename = os.path.abspath(os.path.join(os.path.dirname(file_path), relative_filename))

thiborose / loop_pattern_replace.py

Created December 9, 2022 13:13

replace each occurence of a regex pattern with a different value

	# example - replace all occurences of 4 digits with 4 new random digits in an XML file

	import random
	import re

	content = open("in.xml", "r", encoding="utf-8").read()

	numbers = '(one\|two\|three\|four\|five\|six\|seven\|eight\|nine\|zero\|null)'
	pattern = re.compile(f'{numbers}\s{numbers}\s{numbers}\s{numbers}')

thiborose / template_new_program.py

Created August 2, 2022 13:56

thiborose / logging_setup.py

Last active January 20, 2022 18:35

	# set up logging
	logging_level = logging.DEBUG if args.debug else logging.INFO

	logging.basicConfig(
	level=logging_level,
	format="[%(asctime)s:%(module)s:%(levelname)s] %(message)s",
	handlers=[
	logging.FileHandler("logs.txt"),
	logging.StreamHandler(sys.stdout)
	]

thiborose / conllu.py

Last active July 1, 2021 21:45

Get the conllu annotation for a given string

	def get_conllu(sentence:str, lang="en", to_file:str = None):
	"""
	Takes a string,
	output a file in the CONLLU format
	"""

	url = "http://lindat.mff.cuni.cz/services/udpipe/api/process"

	parameters = {
	"data":sentence,

thiborose / spacy_starter_pack.py

Last active June 16, 2021 13:06

A customized spacy pipeline to characterize texts, encompassing an improved sentencizer.

	import spacy
	from spacy import Language
	from spacy.tokens import Doc
	import pysbd # improved sentencizer

	# Setting up spacy
	@Language.component("pysbd_sentence_boundaries")
	def pysbd_sentence_boundaries(doc):
	"""improved sentence segmenter"""
	seg = pysbd.Segmenter(language="fr", clean=False, char_span=True) #Specify language here