Skip to content

Instantly share code, notes, and snippets.

@DarkPointer
Forked from LazieKat/reddit-video-parser.py
Last active September 19, 2019 16:51
Show Gist options
  • Select an option

  • Save DarkPointer/83794ab743a998c34a1eb38a756902ee to your computer and use it in GitHub Desktop.

Select an option

Save DarkPointer/83794ab743a998c34a1eb38a756902ee to your computer and use it in GitHub Desktop.
Parse, download and merge reddit video and audio
#!/usr/bin/python3
############ Imports
import re, requests, tempfile, argparse, ffmpy, os
import urllib.request
############ Parser function
def parseRedditVideo(link: str, outPath: str):
# Check the link to add the .json request
if link.endswith('/'):
link += '.json'
else:
link += '/.json'
# This would fail if the link is not actually from reddit
try:
response = requests.get(
url=link,
headers={'User-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:69.0) Gecko/20100101 Firefox/69.0'}
)
json = response.json()
isVideo = json[0]['data']['children'][0]['data']['is_video']
except:
print('Error: Is the link correctly spelled?')
exit(1)
if not isVideo:
print('Error: The URL does not contain a video')
exit(1)
# Parse the link for the audio and video info
mpdListLink = json[0]['data']['children'][0]['data']['media']['reddit_video']['dash_url']
mpdResponse = requests.get(url=mpdListLink, headers={
'User-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:69.0) Gecko/20100101 Firefox/69.0'})
mpdXMLData = mpdResponse.text
# Base link contains everything
baseLink = json[0]['data']['children'][0]['data']['url']
# This returns something like ['720p', '360p', 'audio'] or whatever
reSearchData = re.findall('<BaseURL>(.*?)</BaseURL>', mpdXMLData)
highestVideoQualityPartialLink = reSearchData[0]
audioPartialLink = reSearchData[-1]
highestVideoQualityFullLink = baseLink + '/' + highestVideoQualityPartialLink
audioFullLink = baseLink + '/' + audioPartialLink
tempDir = tempfile.gettempdir()
tempVideoFilepath = tempDir + '\\' + next(tempfile._get_candidate_names())
tempAudioFilepath = tempDir + '\\' + next(tempfile._get_candidate_names())
# Store the actual data in the temp directory
urllib.request.urlretrieve(highestVideoQualityFullLink, tempVideoFilepath)
urllib.request.urlretrieve(audioFullLink, tempAudioFilepath)
# Combine stuff into an actual video
ff = ffmpy.FFmpeg(
inputs={tempVideoFilepath: None, tempAudioFilepath: None},
outputs={outPath: " -c copy "}
)
# A failsafe
try:
ff.run()
except ffmpy.FFRuntimeError:
print('Error: video generation failed, does a file with the same name already exist?')
exit(1)
try:
os.remove(tempVideoFilepath)
os.remove(tempAudioFilepath)
except:
print('Error: Could not delete temporary files, files still in use?')
exit(1)
############ Main
if __name__ == '__main__':
arguments = argparse.ArgumentParser(description='Reddit video downloader v1.0')
arguments.add_argument('-i', type=str, metavar="'url'", default=None, help='Video URL')
arguments.add_argument('-o', type=str, metavar="'path'", default='out.mp4', help='Output path')
args = arguments.parse_args()
if args.i == None:
print('Error: No url to download')
exit(1)
parseRedditVideo(args.i, args.o)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment