This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import os | |
| import re | |
| # regex to match the image format of Obsidian | |
| old_format_pattern = r'!\[\[([\w\s.-]+)\]\]' | |
| # Define a function to replace the obsidian format with the standard markdown format | |
| def replace_image_links(file_path, match): | |
| relative_filename = match.group(1) | |
| absolute_filename = os.path.abspath(os.path.join(os.path.dirname(file_path), relative_filename)) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # example - replace all occurences of 4 digits with 4 new random digits in an XML file | |
| import random | |
| import re | |
| content = open("in.xml", "r", encoding="utf-8").read() | |
| numbers = '(one|two|three|four|five|six|seven|eight|nine|zero|null)' | |
| pattern = re.compile(f'{numbers}\s{numbers}\s{numbers}\s{numbers}') |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| __author__ = "xxxx" | |
| __version__ = "0.0.0" | |
| __date__ = "January 01, 2000" | |
| import sys, argparse | |
| from loguru import logger | |
| # code.... | |
| # ... |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # set up logging | |
| logging_level = logging.DEBUG if args.debug else logging.INFO | |
| logging.basicConfig( | |
| level=logging_level, | |
| format="[%(asctime)s:%(module)s:%(levelname)s] %(message)s", | |
| handlers=[ | |
| logging.FileHandler("logs.txt"), | |
| logging.StreamHandler(sys.stdout) | |
| ] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def get_conllu(sentence:str, lang="en", to_file:str = None): | |
| """ | |
| Takes a string, | |
| output a file in the CONLLU format | |
| """ | |
| url = "http://lindat.mff.cuni.cz/services/udpipe/api/process" | |
| parameters = { | |
| "data":sentence, |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import spacy | |
| from spacy import Language | |
| from spacy.tokens import Doc | |
| import pysbd # improved sentencizer | |
| # Setting up spacy | |
| @Language.component("pysbd_sentence_boundaries") | |
| def pysbd_sentence_boundaries(doc): | |
| """improved sentence segmenter""" | |
| seg = pysbd.Segmenter(language="fr", clean=False, char_span=True) #Specify language here |