Skip to content

Instantly share code, notes, and snippets.

@robertknight
Last active August 1, 2024 12:11
Show Gist options
  • Save robertknight/b1216e87d4831fccaf7a03a01421d167 to your computer and use it in GitHub Desktop.
Save robertknight/b1216e87d4831fccaf7a03a01421d167 to your computer and use it in GitHub Desktop.

Revisions

  1. robertknight revised this gist Aug 1, 2024. 1 changed file with 5 additions and 1 deletion.
    6 changes: 5 additions & 1 deletion youtube_captions_api_test.py
    Original file line number Diff line number Diff line change
    @@ -10,6 +10,11 @@
    # ```
    # pip install google-auth-oauthlib requests
    # ```
    # 5. Edit the `video_id` variable below to reference a video that you are
    # the owner of. If you specify a video owned by someone else, the request
    # to download captions will return a 403.
    # 6. Run the script and authenticate in a browser when prompted. After
    # authentication, the transcript should be fetched and printed.

    import argparse
    import json
    @@ -23,7 +28,6 @@
    credentials_file = "youtube-client.json"
    # ID of a YouTube video to fetch captions for.
    video_id = "-MEhsla5YZc"
    video_id = "D-yvZ8CARSg"
    # Path where OAuth 2 credentials are persisted.
    saved_credentials = "saved_oauth_credentials.json"

  2. robertknight revised this gist Aug 1, 2024. 1 changed file with 3 additions and 0 deletions.
    3 changes: 3 additions & 0 deletions youtube_captions_api_test.py
    Original file line number Diff line number Diff line change
    @@ -23,6 +23,7 @@
    credentials_file = "youtube-client.json"
    # ID of a YouTube video to fetch captions for.
    video_id = "-MEhsla5YZc"
    video_id = "D-yvZ8CARSg"
    # Path where OAuth 2 credentials are persisted.
    saved_credentials = "saved_oauth_credentials.json"

    @@ -47,6 +48,7 @@
    credentials.apply(auth_headers)

    # Get ID of first available caption track for the video.
    # See https://developers.google.com/youtube/v3/docs/captions/list.
    captions_list_rsp = requests.get(
    "https://www.googleapis.com/youtube/v3/captions",
    params={"part": "id", "videoId": video_id},
    @@ -57,6 +59,7 @@
    captions_id = captions_list_json["items"][0]["id"]

    # Attempt to download the captions.
    # See https://developers.google.com/youtube/v3/docs/captions/download.
    captions_rsp = requests.get(
    f"https://www.googleapis.com/youtube/v3/captions/{captions_id}",
    headers=auth_headers,
  3. robertknight created this gist Aug 1, 2024.
    66 changes: 66 additions & 0 deletions youtube_captions_api_test.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,66 @@
    # Script to fetch and print the transcript for a YouTube video using the
    # YouTube Data API v3.
    #
    # 1. Create a project in the Google API Console
    # 2. Enable the YouTube Data API v3 for the new project
    # 3. Create credentials for a "Desktop" OAuth client. Download the JSON file
    # containing the credentials at the end of the setup process.
    # 4. Create a new virtualenv and install dependencies with:
    #
    # ```
    # pip install google-auth-oauthlib requests
    # ```

    import argparse
    import json
    import os

    from google.oauth2.credentials import Credentials
    import google_auth_oauthlib
    import requests

    # Credentials for a "Desktop" OAuth client, downloaded from the Google API Console.
    credentials_file = "youtube-client.json"
    # ID of a YouTube video to fetch captions for.
    video_id = "-MEhsla5YZc"
    # Path where OAuth 2 credentials are persisted.
    saved_credentials = "saved_oauth_credentials.json"

    scopes = ["https://www.googleapis.com/auth/youtube.force-ssl"]

    if not os.path.exists(saved_credentials):
    with open(credentials_file, "r") as fp:
    creds = json.load(fp)
    client_id = creds["installed"]["client_id"]
    client_secret = creds["installed"]["client_secret"]
    credentials = google_auth_oauthlib.get_user_credentials(
    scopes, client_id, client_secret
    )
    with open(saved_credentials, "w") as fp:
    fp.write(credentials.to_json())
    else:
    credentials = Credentials.from_authorized_user_file(
    saved_credentials, scopes=scopes
    )

    auth_headers = {}
    credentials.apply(auth_headers)

    # Get ID of first available caption track for the video.
    captions_list_rsp = requests.get(
    "https://www.googleapis.com/youtube/v3/captions",
    params={"part": "id", "videoId": video_id},
    headers=auth_headers,
    )
    captions_list_rsp.raise_for_status()
    captions_list_json = captions_list_rsp.json()
    captions_id = captions_list_json["items"][0]["id"]

    # Attempt to download the captions.
    captions_rsp = requests.get(
    f"https://www.googleapis.com/youtube/v3/captions/{captions_id}",
    headers=auth_headers,
    )
    captions_rsp.raise_for_status()
    transcript = captions_rsp.text
    print(transcript)