-
-
Save DarkPointer/83794ab743a998c34a1eb38a756902ee to your computer and use it in GitHub Desktop.
Parse, download and merge reddit video and audio
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/python3 | |
| ############ Imports | |
| import re, requests, tempfile, argparse, ffmpy, os | |
| import urllib.request | |
| ############ Parser function | |
| def parseRedditVideo(link: str, outPath: str): | |
| # Check the link to add the .json request | |
| if link.endswith('/'): | |
| link += '.json' | |
| else: | |
| link += '/.json' | |
| # This would fail if the link is not actually from reddit | |
| try: | |
| response = requests.get( | |
| url=link, | |
| headers={'User-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:69.0) Gecko/20100101 Firefox/69.0'} | |
| ) | |
| json = response.json() | |
| isVideo = json[0]['data']['children'][0]['data']['is_video'] | |
| except: | |
| print('Error: Is the link correctly spelled?') | |
| exit(1) | |
| if not isVideo: | |
| print('Error: The URL does not contain a video') | |
| exit(1) | |
| # Parse the link for the audio and video info | |
| mpdListLink = json[0]['data']['children'][0]['data']['media']['reddit_video']['dash_url'] | |
| mpdResponse = requests.get(url=mpdListLink, headers={ | |
| 'User-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:69.0) Gecko/20100101 Firefox/69.0'}) | |
| mpdXMLData = mpdResponse.text | |
| # Base link contains everything | |
| baseLink = json[0]['data']['children'][0]['data']['url'] | |
| # This returns something like ['720p', '360p', 'audio'] or whatever | |
| reSearchData = re.findall('<BaseURL>(.*?)</BaseURL>', mpdXMLData) | |
| highestVideoQualityPartialLink = reSearchData[0] | |
| audioPartialLink = reSearchData[-1] | |
| highestVideoQualityFullLink = baseLink + '/' + highestVideoQualityPartialLink | |
| audioFullLink = baseLink + '/' + audioPartialLink | |
| tempDir = tempfile.gettempdir() | |
| tempVideoFilepath = tempDir + '\\' + next(tempfile._get_candidate_names()) | |
| tempAudioFilepath = tempDir + '\\' + next(tempfile._get_candidate_names()) | |
| # Store the actual data in the temp directory | |
| urllib.request.urlretrieve(highestVideoQualityFullLink, tempVideoFilepath) | |
| urllib.request.urlretrieve(audioFullLink, tempAudioFilepath) | |
| # Combine stuff into an actual video | |
| ff = ffmpy.FFmpeg( | |
| inputs={tempVideoFilepath: None, tempAudioFilepath: None}, | |
| outputs={outPath: " -c copy "} | |
| ) | |
| # A failsafe | |
| try: | |
| ff.run() | |
| except ffmpy.FFRuntimeError: | |
| print('Error: video generation failed, does a file with the same name already exist?') | |
| exit(1) | |
| try: | |
| os.remove(tempVideoFilepath) | |
| os.remove(tempAudioFilepath) | |
| except: | |
| print('Error: Could not delete temporary files, files still in use?') | |
| exit(1) | |
| ############ Main | |
| if __name__ == '__main__': | |
| arguments = argparse.ArgumentParser(description='Reddit video downloader v1.0') | |
| arguments.add_argument('-i', type=str, metavar="'url'", default=None, help='Video URL') | |
| arguments.add_argument('-o', type=str, metavar="'path'", default='out.mp4', help='Output path') | |
| args = arguments.parse_args() | |
| if args.i == None: | |
| print('Error: No url to download') | |
| exit(1) | |
| parseRedditVideo(args.i, args.o) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment