sentences = [re.sub(r'.,:?{}', ' ', sentence) for sentence in sentences]
corpus = " ".join(sentences)
words = set(doc.split())
word_index = {word: index for index, word in enumerate(words)}
with open( 'word_index.json' , 'w' ) as file:
    json.dump( word_index , file )