import sys from pydub import AudioSegment, silence import numpy as np import matplotlib.pyplot as plt SAMPLING_RATE = 44100 # THRESHOLD = 50 # CONDITION_INTERVAL = 100 THRESHOLD = 50 CONDITION_INTERVAL = 30 OFFSET = 1 def main(): audio_segment = AudioSegment.from_mp3(sys.argv[1]) start_segment = audio_segment[OFFSET * 1000:15 * 1000] end_segment = audio_segment[-15 * 1000:-1000 * OFFSET] start_offset = detect_silence(start_segment.get_array_of_samples(), threshold = 50) end_offset = detect_silence(end_segment.get_array_of_samples(), threshold = 10, reversed = True) print "%lf\t%lf" % (start_offset, end_offset) def detect_silence(segment, threshold, reversed = False): samples = np.array(segment) channel_data = samples.reshape(len(samples) / 2, 2) index = np.argmin(np.abs(np.average(channel_data, axis = 0))) sample_width = SAMPLING_RATE * 0.01 sample_points = int(len(channel_data) / sample_width) means = np.zeros(sample_points) stds = np.zeros(sample_points) for i in range(sample_points): sample_index = i * sample_width data = [] if reversed: data = channel_data[int(sample_index - SAMPLING_RATE * 0.05):int(sample_index), index] else: data = channel_data[int(sample_index):int(sample_index + SAMPLING_RATE * 0.05), index] if len(data) > 0: means[i] = np.average(np.abs(data)) stds[i] = np.std(data) sample_count = 0 for i in range(sample_points): data = [] if reversed: data = (stds[-(i + CONDITION_INTERVAL):-i] - np.min(stds)) > THRESHOLD else: data = (stds[i:i + CONDITION_INTERVAL] - np.min(stds)) > THRESHOLD if len(data) > 0 and np.all(data): sample_count = i break else: sample_count = sample_points # plt.subplot(3, 1, 1) # plt.plot(means, color = 'g') # plt.subplot(3, 1, 2) # plt.plot((stds - np.min(stds))[0:200 * sample_width]) # plt.subplot(3, 1, 3) # plt.plot(channel_data[:, index]) # plt.show() return float(sample_count * sample_width) / SAMPLING_RATE + OFFSET if __name__ == '__main__': main()