t-eckert · January 4, 2022 04:28 · Jan 4, 2022
diff --git a/README.md b/README.md
@@ -0,0 +1,28 @@
+# Archivist
+
+My buddy Mikhail wanted to download all of his GitHub repositories for safekeeping. I wrote this script for him to do that.
+
+## How to use it
+
+Copy the `archivist.py` file to your computer.
+
+Create a virtual environment and activate it.
+
+``` bash
+python3 -m venv .venv
+.venv/bin/activate
+```
+
+Install the dependencies. It's just one.
+
+``` bash
+pip install httpx
+```
+
+[Create an OAuth access token for GitHub](https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/creating-a-personal-access-token).
+
+Change directory to where you want to put your archives. Call the script from there, passing in the access token.
+
+``` bash
+python3 archivist.py <GITHUB-ACCESS-TOKEN>
+```
diff --git a/archivist.py b/archivist.py
@@ -0,0 +1,47 @@
+"""Gist to help Mikhail download all the files from a GitHub archive."""
+
+import argparse
+import httpx
+
+parser = argparse.ArgumentParser(
+    description="Download all files from a GitHub archive."
+)
+parser.add_argument("token", type=str, help="GitHub access token")
+
+args = parser.parse_args()
+gh_token = args.token
+
+
+def collect_urls(api: str) -> list[tuple[str, str]]:
+    # Call API and continue calling until all pages are exhausted
+
+    response = httpx.get(
+        api,
+        headers={"Authorization": f"token {gh_token}"},
+    )
+
+    if not response.is_success:
+        print(response.json())
+        exit(1)
+
+    archive_urls = [
+        (repo["name"], repo["archive_url"].replace("{archive_format}{/ref}", "tarball"))
+        for repo in response.json()
+    ]
+
+    if response.links.get("next"):
+        archive_urls.extend(collect_urls(response.links["next"]["url"]))
+
+    return archive_urls
+
+
+print("Collecting URLs...")
+archive_urls = collect_urls("https://api.github.com/user/repos")
+print(f"Let's download {len(archive_urls)} repos!")
+
+for url in archive_urls:
+    print("Downloading", url[0])
+    with httpx.stream("GET", url[1], follow_redirects=True) as r:
+        with open(f"{url[0]}.tar.gz", "wb") as f:
+            for b in r.iter_bytes():
+                f.write(b)
No results found