un1tz3r0 · August 23, 2025 18:06 · Dec 22, 2020 · Dec 22, 2020
diff --git a/requirements.txt b/requirements.txt
@@ -1,3 +0,0 @@
-ytmusicapi
-pytube
-blessings

diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1,3 @@
+ytmusicapi
+pytube
+blessings
diff --git a/youtubemusicdownloader.py b/youtubemusicdownloader.py
@@ -0,0 +1,259 @@
+''' This script uses ytmusicapi and pytube together to download your playlists, history or 'liked' songs as 
+high-quality audio-only streams from Youtube Music, which are protected by a "signatureCipher" obfuscation scheme.
+
+To use it, first install [ytmusicapi] and [pytube] using pip, then follow the instructions for creating the auth
+file from the response in an authenticated session to a watch-page request as found in your browser's dev-tools.
+
+The downloaded files are placed in ~/Music, named with the artist and track metadata, and will be skipped instead 
+of downloaded again next time it is run, based on the videoIds of the downloaded songs.
+
+Merry Xmas - V.
+'''
+
+import ytmusicapi
+import pytube
+import re, os, sys
+import requests
+from urllib.parse import parse_qs, urljoin
+import json
+
+try:
+	import blessings
+	clear_eol = blessings.Terminal().clear_eol
+except ImportError as err:
+	clear_eol = "\x1b[K"
+
+
+class DownloaderMixin:
+
+		''' Mixin for ytmusicapi. YTMusic class that uses parts of pytube high-quality streaming and batch track and 
+		playlist downloading the ytmusicapi. YTMusic class. Some examples of usage are given at the 
+		end of the module after the rest of the class definition. '''
+
+		def get_streaming_data_decrypted(self, videoId: str) -> dict:
+
+				''' This is based on the YTMusic.get_streaming_data() method but it makes use of pytube to 
+				decode the signatureCipher obfuscation that "protects" the higher quality adaptiveFormat 
+				stream URLs from being enjoyed by "bots". Robots deserve access to teh same high-fidelity
+				listening experience that we humans take for granted every time we leave auto-play going
+				on the tv and then pass out sitting up on the couch, phone in hand, shoes still on, sleep-
+				shopping on Amazon. '''
+
+				# fetch /get_video_info? which should have a watch URL in there somewhere...
+
+				endpoint = "https://www.youtube.com/get_video_info"
+				params = {"video_id": videoId, "hl": self.language, "el": "detailpage", 
+									"c": "WEB_REMIX", "cver": "0.1"}
+				response = requests.get(endpoint, params, headers=self.headers, proxies=self.proxies)
+				text = parse_qs(response.text)
+				if 'player_response' not in text:
+						# return text # huh?
+						raise Exception('This video is not playable (no player_response key in /get_video_info? response)')
+
+				player_response = json.loads(text['player_response'][0])
+				if 'streamingData' not in player_response:
+						raise Exception('This video is not playable (no streamingData key in player_response key of /get_video_info? response)')
+
+				watch_url = player_response['microformat']['microformatDataRenderer']['urlCanonical'] # this seems like it will probably break easily... maybe fall back to a recursive search for a watch url anywhere in the JSON? or something?
+
+				# get the watch page's HTML, which we need to get the base.js URL that determines how
+				# pytube unscrambles the signatureCipher
+
+				watch_response = requests.get(watch_url, #params, 
+																			headers=self.headers, proxies=self.proxies)
+				watch_html = watch_response.text
+
+				# this is where pytube comes in... given the watch page HTML, it extracts for us the URL of
+				# the base.js for the video player, which is where the signatureCipher is descrambled by a
+				# variable algorithm coded in minified, obfuscated javascript. thankfully, the task of 
+				# extracting from the javascript the steps needed to properly unscramble the signatureCipher
+				# is also handled by pytube.
+
+				player_js_url = pytube.extract.get_ytplayer_js(watch_html)
+				player_js_response = requests.get(urljoin(watch_url, player_js_url), params, headers=self.headers, proxies=self.proxies)
+				player_js = player_js_response.text
+
+				cipher = pytube.cipher.Cipher(js = player_js)
+
+				# okay, now we collect all the streams available and apply the cipher to any that have signed 
+				# URLs. this is where we would also handle DASH manifests... i think? TODO, fo' sho'.
+
+				allformats = []
+
+				sdata = player_response['streamingData']
+				for formatsKey in ['formats', 'adaptiveFormats']:
+					if formatsKey in sdata.keys():
+						for fmt in sdata[formatsKey]:
+							if 'signatureCipher' in fmt.keys():
+								fmtsigcipherq = parse_qs(fmt['signatureCipher'])
+								sig = cipher.get_signature(fmtsigcipherq['s'][0])
+								url = fmtsigcipherq['url'][0] + '&' + fmtsigcipherq['sp'][0] + '=' + sig
+								fmt['url'] = url
+							if not 'url' in fmt.keys():
+								print(f"[warn] streamingData contains format with itag {fmt['itag']} without a url key in get_streaming_data_decrypted({repr(videoId)}):\n\n{repr(fmt)}\n")
+								continue
+							allformats.append(fmt)
+
+				return (sdata, allformats)
+
+		def download_song(self, video_id: str, dest_dir: str, chunk_size: int = 1024*1024, overwrite: bool = False, keep_incomplete: bool = False):
+				song = self.get_song(video_id)
+
+				artists = None
+				if 'artists' in song.keys():
+						nonemptyuniqueartists = list(set([artist for artist in song['artists'] if len(artist) > 0]))
+						if len(nonemptyuniqueartists) > 0:
+								artists = ", ".join(list(set(song['artists'])))
+
+				if 'title' in song.keys():
+						title = song['title']
+				else:
+						title = None
+
+				if title != None and artists != None:
+						filename = artists + " - " + title + " [" + song['videoId'] + "]"
+				elif title != None:
+						filename = title + " [" + song['videoId'] + "]"
+				else:
+						filename = f"[{song['videoId']}]"
+
+				# maybe also download the thumbnail and possibly even set some tags (once we are finished downloading i guess...)
+
+				def sanitize(s):
+						return "".join(re.split("[^a-zA-Z 0-9_\\(\\)\\[\\]\\:\\'\\\"\\@\\!\\#\\$\\%\\&\\=\\+\\,\\.\\<\\>\\;\\|\\{\\}-]",s)).strip()
+
+				# pick from available streams one that is audio-only with the highest average bitrate, hence highest objective quality
+				try:
+					sdata, fmts = self.get_streaming_data_decrypted(song['videoId'])
+					audioonlyformats = [fmt for fmt in fmts if fmt['mimeType'].startswith('audio')]
+					if len(audioonlyformats) > 0:
+						bestfmt = list(sorted([(fmt['averageBitrate'], fmt) for fmt in fmts if fmt['mimeType'].startswith('audio')]))[-1][1]
+					else:
+						bestfmt = list(sorted([(fmt['averageBitrate'], fmt) for fmt in fmts]))[-1][1]
+				except Exception as err:
+					raise RuntimeError("Error selecting suitable streaming format: {err}")
+
+				fileext = bestfmt['mimeType'].split("/")[1].split(";")[0] # use sub-type from mimetype as file extension
+				fullfilename = dest_dir + "/" + sanitize(filename) + "." + fileext
+
+				if os.path.exists(fullfilename) and not overwrite:
+						print(f"Not downloading videoId {repr(song['videoId'])}, would overwrite file {repr(fullfilename)}...")
+						return False
+
+				print(f"Downloading videoId {repr(song['videoId'])} to file {repr(fullfilename)}...")
+				response = requests.get(bestfmt['url'], stream=True, headers=self.headers, proxies=self.proxies)
+				if 'content-length' in [k.lower() for k in response.headers.keys()]:
+					totalbytes = int([val for key,val in response.headers.items() if key.lower() == 'content-length'][0])
+				started = False
+				wrotebytes = 0
+				complete = False
+				try:
+						with open(fullfilename, "wb") as fout:
+								started = True
+								for chunk in response.iter_content(chunk_size=chunk_size):
+										fout.write(chunk)
+										wrotebytes = wrotebytes + len(chunk)
+										print(f"Downloaded {wrotebytes//1024} kbytes...{clear_eol}\r")
+										sys.stdout.flush()
+								complete = True
+								print(f"{clear_eol}\n")
+								sys.stdout.flush()
+				finally:
+						if started and not complete and not keep_incomplete:
+								print(f"Cleaning up partially downloaded file {repr(fullfilename)}...")
+								os.remove(fullfilename)
+
+		def download_playlist(self, playlist, dest_dir = "~/Music", limit_duration = 25*60, no_uploaded = True):
+				dest_dir = os.path.expanduser(dest_dir)
+
+				''' playlist may be specified in a few ways:
+				
+				 1. playlist id
+				 2. return value of get_playlist() etc. (dict containing 'tracks' key with a list of dicts with 'videoId' keys)
+				 3. list of dicts with videoId's
+				 4. list of videoId strings
+				
+				 if given the result of a call to, e.g. get_playlist() or get_liked_songs(), the songs we
+				 want are in a list under the 'tracks' key, assume we were passed either a list 
+				 of things that is directly enumerable and the elements of which each have a 'videoId',
+				 for instance the dict returned by get_playlist() or get_liked_songs() etc. or a 
+				'''
+
+				playlist_items = playlist
+
+				if isinstance(playlist_items, (str, bytes)):
+					# if playlist is a string, assume it is a playlist id and download the playlist
+					playlist_items = self.get_playlist(playlist_items)
+				elif hasattr(playlist_items, 'keys') and 'tracks' in playlist_items.keys():
+					# if playlist is not string-like but is dict-like (or at least, has a keys() method ;) and
+					# has a key 'tracks', assume it is a playlist data structure as returned by get_playlist()
+					playlist_items = playlist_items['tracks']
+
+				def parseDuration(s):
+						fields = s.split(":")
+						if len(fields) < 2:
+								return int(fields[0])
+						elif len(fields) < 3:
+								return int(fields[0]) + int(fields[1]) * 60
+						else:
+								return int(fields[-3])*60*60 + int(fields[-2])*60 + int(fields[-1])
+
+				def check_video_id(video_id, dest_dir):
+						''' determine if there is already a file in the destination directory with a specific 
+						videoId in brackets in the name, in which case we will skip downloading it again unless overwrite=True '''
+						for fn in os.listdir(os.path.expanduser(dest_dir)):
+							fnparts = fn.split(os.path.extsep)
+							if any([fnpart.find('['+video_id+']') >= 0 for fnpart in fnparts]):
+								return True
+						return False
+
+				for listitem in list(playlist_items):
+						if (not 'videoId' in listitem.keys()):
+								raise KeyError("item in playlist_items does not have a videoId!")
+
+						if (not check_video_id(listitem['videoId'], dest_dir)) and ((not 'duration' in listitem.keys()) or (parseDuration(listitem['duration']) < 25*60)):
+								try:
+										ytm.download_song(listitem['videoId'], dest_dir)
+								except Exception as err:
+										print(f"Exception caught while trying to download videoId {listitem['videoId']} - {repr(listitem['title'])}:  {err}")
+						else:
+								print(f"Skipping videoId {listitem['videoId']} - {repr(listitem['title'])} because a file ending with the same id exists.")
+
+
+# Add the mixin to ytmusicapi class, creating our very own frankentype
+
+class YTMusic(ytmusicapi.YTMusic, DownloaderMixin):
+		pass
+
+# A simple example you can run from the cli:
+
+if __name__ == "__main__":
+	usage = '''Missing file "headers_auth.json"... see ytmusicapi.readthedocs.org for explanation of how to use an 
+	authenticated watch page request in a signed-in browser and the browser devtools to set up headers_auth.json for
+	ytmusicapi '''
+
+	if not os.path.exists("headers_auth.json"):
+		print(usage)
+		exit
+
+	ytm=YTMusic("headers_auth.json")
+
+	# EXAMPLE - download the last 10 songs in your playback history
+
+	history = ytm.download_playlist(ytm.get_history())
+
+	# EXAMPLE - download the most recent 1000 songs you liked
+
+	ytm.download_playlist(ytm.get_liked_songs(limit=1000))
+
+
+
+''' If you were a Google Play Music fan like I was, then I feel your pain. 
+As though finding out that Play Music was being shut down weren't bad enough, 
+being given the option of a Takeout archive containing only uploaded tracks,
+not any purchased content (even though it was downloadable through Play Music),
+or transferring your music library over to Youtube Music (which admittedly does
+bear a passing resemblance, at least cosmetically, to Play Music now) where you 
+can still stream it all you want... I wanted those purchased songs I could 
+buy and then download to make a mixtape in <your chosen DAW> or just listen to
+now that I am too broke to be able to afford an internet connection. '''