Created
          March 24, 2016 08:32 
        
      - 
      
- 
        Save MajorTal/67d54887a729b5e5aa85 to your computer and use it in GitHub Desktop. 
Revisions
- 
        MajorTal created this gist Mar 24, 2016 .There are no files selected for viewingThis file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,25 @@ from numpy.random import choice as random_choice, randint as random_randint, rand MAX_INPUT_LEN = 40 AMOUNT_OF_NOISE = 0.2 / MAX_INPUT_LEN CHARS = list("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ .") def add_noise_to_string(a_string, amount_of_noise): """Add some artificial spelling mistakes to the string""" if rand() < amount_of_noise * len(a_string): # Replace a character with a random character random_char_position = random_randint(len(a_string)) a_string = a_string[:random_char_position] + random_choice(CHARS[:-1]) + a_string[random_char_position + 1:] if rand() < amount_of_noise * len(a_string): # Delete a character random_char_position = random_randint(len(a_string)) a_string = a_string[:random_char_position] + a_string[random_char_position + 1:] if len(a_string) < MAX_INPUT_LEN and rand() < amount_of_noise * len(a_string): # Add a random character random_char_position = random_randint(len(a_string)) a_string = a_string[:random_char_position] + random_choice(CHARS[:-1]) + a_string[random_char_position:] if rand() < amount_of_noise * len(a_string): # Transpose 2 characters random_char_position = random_randint(len(a_string) - 1) a_string = (a_string[:random_char_position] + a_string[random_char_position+1] + a_string[random_char_position] + a_string[random_char_position + 2:]) return a_string