# To run: python corpusShuffler -src sourceCourpus.txt -tdt targetCorpus.txt import argparse import random parser = argparse.ArgumentParser(description='## CORPUS SHUFLER ##') parser.add_argument( '-src', help='sorce language corpus to shuffle', required=True) parser.add_argument( '-tgt', help='target language corpus to shuffle', required=True) args = parser.parse_args() src = open(args.src, 'r') tgt = open(args.tgt, 'r') srcOut = open(args.src + '_shuffled', 'w') tgtOut = open(args.tgt + '_shuffled', 'w') srcData = src.readlines() tgtData = tgt.readlines() random.seed(7) # same seed for both files (to save the alignment) random.shuffle(srcData) random.seed(7) # same seed for both files (to save the alignment) random.shuffle(tgtData) open(args.src + '_shuffled', 'w').writelines(srcData) open(args.tgt + '_shuffled', 'w').writelines(tgtData)