- 
      
- 
        Save argaiv91/2caf014c61a35cf48b43a4f7074a1a86 to your computer and use it in GitHub Desktop. 
    Split audio file with ffmpeg based on chapter metadata
  
        
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | #!/usr/bin/env python3 | |
| import sys | |
| import os | |
| import re | |
| import subprocess as sub | |
| import argparse | |
| import tempfile | |
| import json | |
| from concurrent.futures import ThreadPoolExecutor, as_completed | |
| from multiprocessing import cpu_count | |
| # split_ffmpeg.py | |
| # | |
| # Split audio file into multiple files, using ffmpeg, with no loss in quality. | |
| # | |
| # Uses chapter metadata to decide at which timestamps to split the file. Obviously this script | |
| # will only be able to split files with such metadata included. Chapter metadata should be | |
| # visible from 'ffprobe <file>' output. If not, this script will be useless. Example metadata is | |
| # the form: | |
| # | |
| # Chapter #0:0: start 0.000000, end 1079.000000 | |
| # Metadata: | |
| # title : Chapter One | |
| # Chapter #0:1: start 1079.000000, end 2040.000000 | |
| # Metadata: | |
| # title : Chapter Two | |
| # Chapter #0:2: start 2040.000000, end 2878.000000 | |
| # Metadata: | |
| # title : Chapter Three | |
| # Chapter #0:3: start 2878.000000, end 3506.000000 | |
| # Metadata: | |
| # title : Chapter Four | |
| # Chapter #0:4: start 3506.000000, end 4696.000000 | |
| # Metadata: | |
| # title : Chapter Five | |
| # Chapter #0:5: start 4696.000000, end 5741.000000 | |
| # Metadata: | |
| # title : Chapter Six | |
| # Chapter #0:6: start 5741.000000, end 7131.000000 | |
| # ... | |
| # | |
| # By default, the chapter files will be written into a temporary directory under /tmp. | |
| # You may specify alternative output directory with '--outdir <path>', which will be created if it | |
| # does not exist. Note that this script will never overwrite files, so you must delete conflicting | |
| # files manually (or specify some other empty/nonexistent directory) | |
| # | |
| # The input file basename will be used to name the filenames. You can change this behaviour with | |
| # flag '-use-title', in which case the title of the chapters, if they are available in the chapter | |
| # metadata, will include the title instead (this is not useful is your metadata is crappy, for example). | |
| # | |
| # Work is done in parallel with the help of a thread pool. You may specify | |
| # how many parallel jobs you want with command line param '--concurrency'. | |
| # The default concurrency is equal to the number of cores available (although I think this | |
| # might be silly since this kind of processing isn't so much cpu-bound as it is IO-bound). | |
| # | |
| # Dependencies: | |
| # | |
| # - Python 3.5 or newer | |
| # - Obviously you need ffmpeg (and ffprobe) installed. Otherwise python3 stdlib should suffice. | |
| # | |
| # Author: Markus H (MawKKe) [email protected] | |
| # Date: 2018-07 | |
| # | |
| def parseChapters(filename): | |
| command = [ "ffprobe", '-i', filename, "-v", "error", "-print_format", "json", "-show_chapters"] | |
| try: | |
| # ffmpeg & ffprobe write output into stderr, except when | |
| # using -show_XXXX and -print_format. Strange. | |
| p = sub.run(command, stdout=sub.PIPE, stderr=sub.PIPE) | |
| # had we ran ffmpeg instead of ffprobe, this would throw since ffmpeg without | |
| # an output file will exit with exitcode != 0 | |
| p.check_returncode() | |
| # .decode() will most likely explode if the ffprobe json output (chapter metadata) | |
| # was written with some weird encoding, and even more so if the data contains text in | |
| # multiple different text encodings... | |
| # TODO? | |
| # https://stackoverflow.com/questions/10009753/python-dealing-with-mixed-encoding-files | |
| output = p.stdout.decode('utf8') | |
| d = json.loads(output) | |
| return d | |
| except sub.CalledProcessError as e: | |
| print("ERROR: ", e) | |
| print("FFPROBE-STDOUT: ", p.stdout) | |
| print("FFPROBE-STDERR: ", p.stderr) | |
| return None | |
| def main(argv): | |
| p = argparse.ArgumentParser() | |
| p.add_argument("--infile", required=True, help="Input file") | |
| p.add_argument("--concurrency", required=False, default=cpu_count(), help="Number of concurrent processes", type=int) | |
| p.add_argument("--use-title", required=False, dest='use_title', action='store_true', | |
| help="includes chapter title in the filenames") | |
| p.add_argument("--outdir", required=False, | |
| help="Output directory. If omitted, files are written into a new /tmp/ffmpeg-split-XXX directory.") | |
| args = p.parse_args(argv[1:]) | |
| fbase, fext = os.path.splitext(os.path.basename(args.infile)) | |
| if 0 in [len(fbase), len(fext)]: | |
| print("Something is wrong, basename or file extension is empty") | |
| return -1 | |
| if fext.startswith("."): | |
| fext = fext[1:] | |
| info = parseChapters(args.infile) | |
| if info is None or info.get("chapters", None) is None or len(info["chapters"]) == 0: | |
| print("Could not parse chapters, exiting...") | |
| return -1 | |
| if args.outdir: | |
| os.makedirs(args.outdir, exist_ok=True) | |
| outdir = args.outdir | |
| else: | |
| outdir = tempfile.mkdtemp(prefix="ffmpeg-split-") | |
| print("Output directory:", outdir) | |
| def validate_chapter(ch): | |
| start = ch['start'] | |
| end = ch['end'] | |
| if (end - start) <= 0: | |
| print("WARNING: chapter {0} duration is zero or negative (start: {1}, end: {2}), skipping...".format(ch['id'], start, end)) | |
| return None | |
| return ch | |
| chapters = list(filter(None, (validate_chapter(ch) for ch in info["chapters"]))) | |
| def outf(n, tags): | |
| fmt = "{0}/{1} - chapter {2}.{3}" | |
| if args.use_title and tags and tags.get("title", False): | |
| return fmt.format(outdir, tags["title"], n, fext) | |
| return fmt.format(outdir, fbase, n, fext) | |
| # WIS = WorkItemS, list of 5-tuples | |
| WIS = [(args.infile, ch["start_time"], ch["end_time"], outf(ch["id"] + 1, ch.get("tags", None))) for ch in chapters] | |
| print("Total: {0} chapters, concurrency: {1}".format(len(WIS), args.concurrency)) | |
| errors = 0 | |
| with ThreadPoolExecutor(max_workers=args.concurrency) as pool: | |
| def start_all(): | |
| for wi in WIS: | |
| print("Submitting:", wi) | |
| yield pool.submit(ffmpeg_split, wi), wi | |
| futs = dict(start_all()) | |
| for fut in as_completed(futs): | |
| try: | |
| res = fut.result() | |
| # this looks nasty as hell, but hey, this is what they do in python docs.. | |
| except Exception as e: | |
| print("Hmmm... general exeption:", e) | |
| else: | |
| item = res["item"] | |
| if res['ok']: | |
| # CompletedProcess and has 'args' instead of 'cmd', because...? | |
| print("SUCCESS: {0}".format(res["outfile"])) | |
| else: | |
| errors += 1 | |
| print("FAILURE: {0}".format(res["outfile"])) | |
| print("Command: {0}".format(item.cmd)) | |
| print("FFMPEG-STDOUT:", item.stdout) | |
| print("FFMPEG-STDERR:", item.stderr) | |
| print("-" * 20) | |
| print() | |
| if errors > 0: | |
| n = len(chapters) | |
| print("WARNING: there were errors, {0} out of {1} chapters were processed correctly".format(n-errors, n)) | |
| else: | |
| print("All valid chapters were successfully processed") | |
| print("Output directory:", outdir) | |
| return 0 | |
| def ffmpeg_split(wi): | |
| infile, start, end, outfile= wi | |
| # NOTE: | |
| # '-nostdin' param should prevent your terminal becoming all messed up during the pool processing. | |
| # But if it does, you can fix it with 'reset' and/or 'stty sane'. | |
| # If corruption still occurs, let me know (email is at the top of the file). | |
| cmd = [ | |
| "ffmpeg", | |
| "-nostdin", | |
| "-i", infile, | |
| "-v", "error", | |
| "-map_chapters", "-1", | |
| "-vn", | |
| "-c", "copy", | |
| "-ss", start, | |
| "-to", end, | |
| "-n", | |
| outfile | |
| ] | |
| s = sub.run(cmd, stdout=sub.PIPE, stderr=sub.PIPE) | |
| try: | |
| s.check_returncode() | |
| return {'ok': True, 'outfile': outfile, 'item': s} | |
| except sub.CalledProcessError as e: | |
| return {'ok': False, 'outfile': outfile, 'item': e} | |
| if __name__ == '__main__': | |
| sys.exit(main(sys.argv)) | 
  
    Sign up for free
    to join this conversation on GitHub.
    Already have an account?
    Sign in to comment