Skip to content

Instantly share code, notes, and snippets.

@darmiel
Last active June 28, 2022 23:45
Show Gist options
  • Select an option

  • Save darmiel/8d3d93dc72d19f49ffc7f386db73ed89 to your computer and use it in GitHub Desktop.

Select an option

Save darmiel/8d3d93dc72d19f49ffc7f386db73ed89 to your computer and use it in GitHub Desktop.
Crawl twitch chat from a past broadcast
import requests
import json
# change me
BROADCAST_ID: str = ""
CLIENT_ID: str = ""
# ---
START_URL: str = f"https://api.twitch.tv/v5/videos/{BROADCAST_ID}/comments?content_offset_seconds=0"
CURSOR_URL: str = f"https://api.twitch.tv/v5/videos/{BROADCAST_ID}/comments?cursor="
HEADERS = {"client-id": CLIENT_ID}
# avoid maximum recursion depth exceeded while calling a Python object
# https://stackoverflow.com/questions/5061582/setting-stacksize-in-a-python-script/16248113#16248113
import sys
import resource
resource.setrlimit(resource.RLIMIT_STACK, (2**29,-1))
sys.setrecursionlimit(10**6)
# ---
def load_comments(token: str, all_comments: list) -> list:
url: str
if token == "":
url = START_URL
else:
url = CURSOR_URL + token
resp = requests.get(url, headers=HEADERS)
jr = json.loads(resp.text)
next_token = jr['_next']
comments = jr['comments']
# debug
print("loaded", len(comments), "comments (", len(all_comments), "total )")
print(" >> next token:", next_token)
for c in comments:
all_comments.append(c)
if next_token != "":
load_comments(next_token, all_comments)
def main():
all_comments = []
load_comments("", all_comments)
print("loaded a total of", len(all_comments), "comments. saving to file...")
jr = json.dumps(all_comments)
with open("resp.json", "w+") as f:
f.write(jr)
print(" >> done!")
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment