Skip to content

Instantly share code, notes, and snippets.

@Iamgoofball
Created April 17, 2020 03:58
Show Gist options
  • Save Iamgoofball/a1a5d54bc2dbcdcef83335607d4d3cd6 to your computer and use it in GitHub Desktop.
Save Iamgoofball/a1a5d54bc2dbcdcef83335607d4d3cd6 to your computer and use it in GitHub Desktop.
/mlp/ datasets -> tacotron standard, make sure to pip install pysoundfile, numpy, and pprint
import io
import glob, os
import pprint
import soundfile
def main():
file_object = open('compiled_dataset.txt', 'a')
os.mkdir("processed_audio")
line_count = 0
for filename in glob.iglob('./**', recursive=True):
if os.path.isfile(filename) and os.path.splitext(filename)[1] == ".flac": # filter dirs
line_count += 1
print(os.path.splitext(filename)[0])
print(os.path.splitext(filename)[1])
transcription = open(os.path.splitext(filename)[0] + ".txt")
print(os.path.basename(filename))
voiceline_transcription = transcription.read()
print(voiceline_transcription)
data, samplerate = soundfile.read(filename)
soundfile.write("./processed_audio/line_"+str(line_count)+".wav", data, 48000, subtype="PCM_16")
if voiceline_transcription:
file_object.write("wavs/out/line_"+str(line_count)+".wav"+"|"+voiceline_transcription+"\n")
else:
print("No transcription made for this file!")
file_object.close()
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment