rudolfbyker · April 25, 2017 10:35 · Apr 25, 2017
diff --git a/split_wav.py b/split_wav.py
@@ -0,0 +1,108 @@
+#!/usr/bin/env python
+
+from scipy.io import wavfile
+import os
+import numpy as np
+import argparse
+from tqdm import tqdm
+
+# Utility functions
+
+def windows(signal, window_size, step_size):
+    if type(window_size) is not int:
+        raise AttributeError("Window size must be an integer.")
+    if type(step_size) is not int:
+        raise AttributeError("Step size must be an integer.")
+    for i_start in xrange(0, len(signal), step_size):
+        i_end = i_start + window_size
+        if i_end >= len(signal):
+            break
+        yield signal[i_start:i_end]
+
+def energy(samples):
+    return np.sum(np.power(samples, 2.)) / float(len(samples))
+
+def rising_edges(binary_signal):
+    previous_value = 0
+    index = 0
+    for x in binary_signal:
+        if x and not previous_value:
+            yield index
+        previous_value = x
+        index += 1
+
+# Process command line arguments
+
+parser = argparse.ArgumentParser(description='Split a WAV file at silence.')
+parser.add_argument('input_file', type=str, help='The WAV file to split.')
+parser.add_argument('--output-dir', '-o', type=str, default='.', help='The output folder. Defaults to the current folder.')
+parser.add_argument('--min-silence-length', '-m', type=float, default=3., help='The minimum length of silence at which a split may occur [seconds]. Defaults to 3 seconds.')
+parser.add_argument('--silence-threshold', '-t', type=float, default=1e-6, help='The energy level (between 0.0 and 1.0) below which the signal is regarded as silent. Defaults to 1e-6 == 0.0001%.')
+parser.add_argument('--step-duration', '-s', type=float, default=None, help='The amount of time to step forward in the input file after calculating energy. Smaller value = slower, but more accurate silence detection. Larger value = faster, but might miss some split opportunities. Defaults to (min-silence-length / 10.).')
+parser.add_argument('--dry-run', '-n', action='store_true', help='Don\'t actually write any output files.')
+
+args = parser.parse_args()
+
+input_filename = args.input_file
+window_duration = args.min_silence_length
+if args.step_duration is None:
+    step_duration = window_duration / 10.
+else:
+    step_duration = args.step_duration
+silence_threshold = args.silence_threshold
+output_dir = args.output_dir
+output_filename_prefix = os.path.splitext(os.path.basename(input_filename))[0]
+dry_run = args.dry_run
+
+print "Splitting {} where energy is below {}% for longer than {}s.".format(
+    input_filename,
+    silence_threshold * 100.,
+    window_duration
+)
+
+# Read and split the file
+
+sample_rate, samples = input_data=wavfile.read(filename=input_filename, mmap=True)
+
+max_amplitude = np.iinfo(samples.dtype).max
+max_energy = energy([max_amplitude])
+
+window_size = int(window_duration * sample_rate)
+step_size = int(step_duration * sample_rate)
+
+signal_windows = windows(
+    signal=samples,
+    window_size=window_size,
+    step_size=step_size
+)
+
+window_energy = (energy(w) / max_energy for w in tqdm(
+    signal_windows,
+    total=int(len(samples) / float(step_size))
+))
+
+window_silence = (e > silence_threshold for e in window_energy)
+
+cut_times = (r * step_duration for r in rising_edges(window_silence))
+
+# This is the step that takes long, since we force the generators to run.
+print "Finding silences..."
+cut_samples = [int(t * sample_rate) for t in cut_times]
+cut_samples.append(-1)
+
+cut_ranges = [(i, cut_samples[i], cut_samples[i+1]) for i in xrange(len(cut_samples) - 1)]
+
+for i, start, stop in tqdm(cut_ranges):
+    output_file_path = "{}_{:03d}.wav".format(
+        os.path.join(output_dir, output_filename_prefix),
+        i
+    )
+    if not dry_run:
+        print "Writing file {}".format(output_file_path)
+        wavfile.write(
+            filename=output_file_path,
+            rate=sample_rate,
+            data=samples[start:stop]
+        )
+    else:
+        print "Not writing file {}".format(output_file_path)