# Script to fetch and print the transcript for a YouTube video using the # YouTube Data API v3. # # 1. Create a project in the Google API Console # 2. Enable the YouTube Data API v3 for the new project # 3. Create credentials for a "Desktop" OAuth client. Download the JSON file # containing the credentials at the end of the setup process. # 4. Create a new virtualenv and install dependencies with: # # ``` # pip install google-auth-oauthlib requests # ``` # 5. Edit the `video_id` variable below to reference a video that you are # the owner of. If you specify a video owned by someone else, the request # to download captions will return a 403. # 6. Run the script and authenticate in a browser when prompted. After # authentication, the transcript should be fetched and printed. import argparse import json import os from google.oauth2.credentials import Credentials import google_auth_oauthlib import requests # Credentials for a "Desktop" OAuth client, downloaded from the Google API Console. credentials_file = "youtube-client.json" # ID of a YouTube video to fetch captions for. video_id = "-MEhsla5YZc" # Path where OAuth 2 credentials are persisted. saved_credentials = "saved_oauth_credentials.json" scopes = ["https://www.googleapis.com/auth/youtube.force-ssl"] if not os.path.exists(saved_credentials): with open(credentials_file, "r") as fp: creds = json.load(fp) client_id = creds["installed"]["client_id"] client_secret = creds["installed"]["client_secret"] credentials = google_auth_oauthlib.get_user_credentials( scopes, client_id, client_secret ) with open(saved_credentials, "w") as fp: fp.write(credentials.to_json()) else: credentials = Credentials.from_authorized_user_file( saved_credentials, scopes=scopes ) auth_headers = {} credentials.apply(auth_headers) # Get ID of first available caption track for the video. # See https://developers.google.com/youtube/v3/docs/captions/list. captions_list_rsp = requests.get( "https://www.googleapis.com/youtube/v3/captions", params={"part": "id", "videoId": video_id}, headers=auth_headers, ) captions_list_rsp.raise_for_status() captions_list_json = captions_list_rsp.json() captions_id = captions_list_json["items"][0]["id"] # Attempt to download the captions. # See https://developers.google.com/youtube/v3/docs/captions/download. captions_rsp = requests.get( f"https://www.googleapis.com/youtube/v3/captions/{captions_id}", headers=auth_headers, ) captions_rsp.raise_for_status() transcript = captions_rsp.text print(transcript)