I hereby claim:
- I am psorianom on github.
- I am psoriano (https://keybase.io/psoriano) on keybase.
- I have a public key ASBEtv4RYHXAyi-Dzj24fMUzLFjCWqwBS88Cg8Oxw0AY4Qo
To claim this, I am signing this object:
I hereby claim:
To claim this, I am signing this object:
| from importlib import import_module | |
| from pprint import pprint | |
| from typing import List, Dict | |
| from dash.development.base_component import Component | |
| from dash_html_components import Div | |
| from dash_html_components import P ,Mark | |
| from dash_interface.helper import serialize_components |
| """ | |
| Class that inherits MosesTokenizer and adds a method which returns the spans. Kinda flaky with the escape, unescape, | |
| detokenize situation, so watch out! | |
| """ | |
| from sacremoses import MosesTokenizer, MosesDetokenizer | |
| class MosesTokenizerSpans(MosesTokenizer): | |
| def __init__(self, lang="en", custom_nonbreaking_prefixes_file=None): | |
| MosesTokenizer.__init__(self, lang=lang, |
| '''Genreates a syntethic dataset (csv) of persons to test the SNU_assignator | |
| Usage: | |
| SNU_gen.py <o> [options] | |
| Arguments: | |
| <o> An output path to store the ysntethic data csv | |
| -n PER Number of persons to generate [default: 2000:int] | |
| -f FIL Representation proportion of the filiere. Ex: "0.1,0.1,...,0.1" (default: None) | |
| -r RES Representation proportion of the residence Ex: "0.1,0.1,...,0.1" (default: None) |
| import xml.etree.ElementTree | |
| import glob | |
| texts = [] | |
| all_files = list(glob.glob('./extracted/*.xml')) | |
| n_files = len(all_files) | |
| with open("all_capp_new.txt", "w") as filo: | |
| for i,f in enumerate(all_files): | |
| print("Treating file {0} => {1}/{2}\n".format(f, i+1 , n_files)) | |
| e = xml.etree.ElementTree.parse(f).getroot() |
| # -*- coding: utf-8 -*- | |
| # Authors: Olivier Grisel <[email protected]> | |
| # Mathieu Blondel <[email protected]> | |
| # Lars Buitinck <[email protected]> | |
| # Robert Layton <[email protected]> | |
| # Jochen Wersdörfer <[email protected]> | |
| # Roman Sinayev <[email protected]> | |
| # | |
| # License: BSD 3 clause | |
| """ |