from numpy.random import choice as random_choice, randint as random_randint, rand MAX_INPUT_LEN = 40 AMOUNT_OF_NOISE = 0.2 / MAX_INPUT_LEN CHARS = list("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ .") def add_noise_to_string(a_string, amount_of_noise): """Add some artificial spelling mistakes to the string""" if rand() < amount_of_noise * len(a_string): # Replace a character with a random character random_char_position = random_randint(len(a_string)) a_string = a_string[:random_char_position] + random_choice(CHARS[:-1]) + a_string[random_char_position + 1:] if rand() < amount_of_noise * len(a_string): # Delete a character random_char_position = random_randint(len(a_string)) a_string = a_string[:random_char_position] + a_string[random_char_position + 1:] if len(a_string) < MAX_INPUT_LEN and rand() < amount_of_noise * len(a_string): # Add a random character random_char_position = random_randint(len(a_string)) a_string = a_string[:random_char_position] + random_choice(CHARS[:-1]) + a_string[random_char_position:] if rand() < amount_of_noise * len(a_string): # Transpose 2 characters random_char_position = random_randint(len(a_string) - 1) a_string = (a_string[:random_char_position] + a_string[random_char_position+1] + a_string[random_char_position] + a_string[random_char_position + 2:]) return a_string