julianpistorius · September 18, 2025 21:16 · Jul 13, 2025 · Jul 10, 2025
diff --git a/k8tre-demo-audiotrack.ipynb b/k8tre-demo-audiotrack.ipynb
@@ -22,12 +22,12 @@
    "outputs": [],
    "source": [
     "lines = [\n",
-    "  \"CATER is designed using infrastructure-as-code, and is deployed using a git-ops workflow. We're using ArgoCD which is an open-source git-ops tools designed for Kubernetes. This means all infrastructure is fully reproducible, and all approved code updates are automatically deployed by ArgoCD.\",\n",
+    "  \"CATER is designed using infrastructure-as-code, and is deployed using a git-ops workflow. We\"re using ArgoCD which is an open-source git-ops tools designed for Kubernetes. This means all infrastructure is fully reproducible, and all approved code updates are automatically deployed by ArgoCD.\",\n",
     "  \"CATER consists of a set of applications (or components). By default we install everything required to run a T.R.E., but all components can be disabled or replaced by another implementation.\",\n",
-    "  \"For the first three months work has focussed on this backend work of writing and deploying components, but we've now started integrating those components so they can be used through a frontend.\",\n",
+    "  \"For the first three months work has focussed on this backend work of writing and deploying components, but we\"ve now started integrating those components so they can be used through a frontend.\",\n",
     "\n",
     "  \"You login to CATER using Keycloak. A username and password are used in this demo, but multi-factor authentication can be easily added, and Keycloak can federate with other identity providers using SAML or O.I.C.D.\",\n",
-    "  \"In this demo we're using JupyterHub as a control plane for researcher workspaces.\",\n",
+    "  \"In this demo we\"re using JupyterHub as a control plane for researcher workspaces.\",\n",
     "  \"You can see a list of projects and workspace types. This demo only has Ubuntu Mate desktops.\",\n",
     "  \"When you launch a workspace a new Kubernetes pod is created, project storage is mounted, and users are given access via Apache Guacamole which is an open-source remote desktop gateway.\",\n",
     "  \"As you can see you have a full desktop via a web browser\",\n",
@@ -74,7 +74,7 @@
     "    s = r[\"AudioStream\"]\n",
     "\n",
     "    with wave.open(outfile, \"wb\") as wav:\n",
-    "        wav.setparams((1, 2, 16000, 0, 'NONE', 'NONE'))\n",
+    "        wav.setparams((1, 2, 16000, 0, \"NONE\", \"NONE\"))\n",
     "        wav.writeframes(s.read())\n"
    ]
   },
@@ -90,18 +90,187 @@
     "    speak(text, f\"{i:02d}.wav\")"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5784e676-37aa-4e92-99bb-c34727329b32",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
   {
    "cell_type": "code",
    "execution_count": null,
    "id": "07110973",
    "metadata": {},
    "outputs": [],
+   "source": [
+    "import ffmpeg\n",
+    "import os\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9afc0e1e-34d8-4928-bba8-4ceb89f5f57d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Define input file paths\n",
+    "audio_files = [\"00.wav\", \"01.wav\", \"02.wav\", \"03.wav\", \"04.wav\", \"05.wav\", \"06.wav\", \"07.wav\"]\n",
+    "output_file = \"concatenated_with_gaps.wav\"\n",
+    "# audio_codec = \"libmp3lame\" # Or \"aac\", \"pcm_s16le\", etc. based on desired output\n",
+    "audio_codec = \"pcm_s16le\"\n",
+    "\n",
+    "# Define the gap duration in seconds\n",
+    "START_DURATION = 2.0\n",
+    "GAP_DURATION = 2.0\n",
+    "END_DURATION = 2.0"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "752cf3cc-645a-4777-a535-89b2b6d761c4",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# --- Step 1: Get audio file properties for silence generation ---\n",
+    "# We need to match the sample rate and channel layout of the real audio files\n",
+    "# to ensure consistent silence. We\"ll just probe the first audio file.\n",
+    "def get_audio_properties(file_path):\n",
+    "    \"\"\"\n",
+    "    Gets sample rate and channel layout of an audio file using ffprobe.\n",
+    "    \"\"\"\n",
+    "    try:\n",
+    "        probe = ffmpeg.probe(file_path)\n",
+    "        audio_stream = next((s for s in probe[\"streams\"] if s[\"codec_type\"] == \"audio\"), None)\n",
+    "        if audio_stream:\n",
+    "            return {\n",
+    "                \"sample_rate\": int(audio_stream[\"sample_rate\"]),\n",
+    "                \"channel_layout\": audio_stream.get(\"channel_layout\", \"mono\"), # Default to mono if not found\n",
+    "                \"duration\": float(audio_stream[\"duration\"]),\n",
+    "            }\n",
+    "        else:\n",
+    "            raise ValueError(f\"No audio stream found for: {file_path}\")\n",
+    "    except ffmpeg.Error as e:\n",
+    "        print(f\"Error probing {file_path}: {e.stderr.decode()}\")\n",
+    "        raise\n",
+    "    except Exception as e:\n",
+    "        print(f\"An unexpected error occurred while probing {file_path}: {e}\")\n",
+    "        raise"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0149e5cd-a562-458a-a2d7-4b9ff4a24144",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Properties for all audio files must match\n",
+    "audio_props = [get_audio_properties(f) for f in audio_files]\n",
+    "sample_rates = set(p[\"sample_rate\"] for p in audio_props)\n",
+    "if len(sample_rates) != 1:\n",
+    "    print(f\"Multiple sample rates found: {sample_rates}\")\n",
+    "channel_layouts = set(p[\"channel_layout\"] for p in audio_props)\n",
+    "if len(channel_layouts) != 1:\n",
+    "    print(f\"Multiple channel layouts found: {channel_layouts}\")\n",
+    "\n",
+    "first_audio_props = audio_props[0]\n",
+    "SAMPLE_RATE = next(iter(sample_rates))\n",
+    "CHANNEL_LAYOUT =  next(iter(channel_layouts))\n",
+    "\n",
+    "print(f\"Using sample rate: {SAMPLE_RATE} Hz, channel layout: {CHANNEL_LAYOUT} for silence.\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ff6db6c4-e646-4e3b-8bcc-adf1699431f1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# --- Step 2: Build the filtergraph including silence ---\n",
+    "graph_inputs = []\n",
+    "\n",
+    "# Insert gap at start\n",
+    "silence_input = ffmpeg.input(\n",
+    "    f\"anullsrc=r={SAMPLE_RATE}:cl={CHANNEL_LAYOUT}\",\n",
+    "    f=\"lavfi\",\n",
+    "    t=START_DURATION,\n",
+    ").audio\n",
+    "graph_inputs.append(silence_input)\n",
+    "\n",
+    "for i, audio_file in enumerate(audio_files):\n",
+    "    graph_inputs.append(ffmpeg.input(audio_file).audio)\n",
+    "\n",
+    "    # Add silence between tracks\n",
+    "    if i < len(audio_files) - 1:\n",
+    "        silence_input = ffmpeg.input(\n",
+    "            f\"anullsrc=r={SAMPLE_RATE}:cl={CHANNEL_LAYOUT}\",\n",
+    "            f=\"lavfi\", # \"lavfi\" is for libavfilter inputs like anullsrc\n",
+    "            t=GAP_DURATION,\n",
+    "        ).audio\n",
+    "        graph_inputs.append(silence_input)\n",
+    "\n",
+    "# Insert gap at end\n",
+    "silence_input = ffmpeg.input(\n",
+    "    f\"anullsrc=r={SAMPLE_RATE}:cl={CHANNEL_LAYOUT}\",\n",
+    "    f=\"lavfi\",\n",
+    "    t=END_DURATION,\n",
+    ").audio\n",
+    "graph_inputs.append(silence_input)\n",
+    "\n",
+    "\n",
+    "# Need to tell the concat filter how many inputs it has in total.\n",
+    "total_inputs_for_concat = len(graph_inputs)\n",
+    "\n",
+    "# Apply the concat filter\n",
+    "concatenated_audio = ffmpeg.filter(\n",
+    "    graph_inputs,\n",
+    "    \"concat\",\n",
+    "    n=total_inputs_for_concat,\n",
+    "    v=0, # No video streams\n",
+    "    a=1  # One audio stream per input\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "360882f9-5fba-476f-a741-539e3df749cb",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# --- Step 3: Define the output and run ---\n",
+    "output_stream = ffmpeg.output(\n",
+    "    concatenated_audio,\n",
+    "    output_file,\n",
+    "    acodec=audio_codec,\n",
+    ")\n",
+    "\n",
+    "try:\n",
+    "    ffmpeg.run(output_stream, overwrite_output=True)\n",
+    "    print(f\"Audio files concatenated to {output_file} with {GAP_DURATION}-second gaps successfully!\")\n",
+    "\n",
+    "except ffmpeg.Error as e:\n",
+    "    print(e)\n",
+    "    raise"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2e8f960a-afa5-49c0-b423-c9eabb6d1b51",
+   "metadata": {},
+   "outputs": [],
    "source": []
   }
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "cloud",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },
@@ -115,7 +284,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.13.3"
+   "version": "3.13.5"
   }
  },
  "nbformat": 4,

diff --git a/k8tre-demo-audiotrack.ipynb b/k8tre-demo-audiotrack.ipynb
@@ -0,0 +1,123 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "13e39ceb",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import boto3\n",
+    "import wave\n",
+    "\n",
+    "# boto3.setup_default_session(profile_name=\"...\")\n",
+    "polly = boto3.client(\"polly\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a670d769",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "lines = [\n",
+    "  \"CATER is designed using infrastructure-as-code, and is deployed using a git-ops workflow. We're using ArgoCD which is an open-source git-ops tools designed for Kubernetes. This means all infrastructure is fully reproducible, and all approved code updates are automatically deployed by ArgoCD.\",\n",
+    "  \"CATER consists of a set of applications (or components). By default we install everything required to run a T.R.E., but all components can be disabled or replaced by another implementation.\",\n",
+    "  \"For the first three months work has focussed on this backend work of writing and deploying components, but we've now started integrating those components so they can be used through a frontend.\",\n",
+    "\n",
+    "  \"You login to CATER using Keycloak. A username and password are used in this demo, but multi-factor authentication can be easily added, and Keycloak can federate with other identity providers using SAML or O.I.C.D.\",\n",
+    "  \"In this demo we're using JupyterHub as a control plane for researcher workspaces.\",\n",
+    "  \"You can see a list of projects and workspace types. This demo only has Ubuntu Mate desktops.\",\n",
+    "  \"When you launch a workspace a new Kubernetes pod is created, project storage is mounted, and users are given access via Apache Guacamole which is an open-source remote desktop gateway.\",\n",
+    "  \"As you can see you have a full desktop via a web browser\",\n",
+    "]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "bf70124e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "voices = polly.describe_voices()[\"Voices\"]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "47c5c74e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# voice = \"Brian\"\n",
+    "voice = \"Amy\"\n",
+    "# voice = \"Emma\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6d166646",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def speak(text, outfile):\n",
+    "    r = polly.synthesize_speech(\n",
+    "      Engine=\"neural\",\n",
+    "      LanguageCode=\"en-GB\",\n",
+    "      OutputFormat=\"pcm\",\n",
+    "      Text=text,\n",
+    "      VoiceId=voice,\n",
+    "    )\n",
+    "    s = r[\"AudioStream\"]\n",
+    "\n",
+    "    with wave.open(outfile, \"wb\") as wav:\n",
+    "        wav.setparams((1, 2, 16000, 0, 'NONE', 'NONE'))\n",
+    "        wav.writeframes(s.read())\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f62929a0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "for i, text in enumerate(lines):\n",
+    "    print(text)\n",
+    "    speak(text, f\"{i:02d}.wav\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "07110973",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "cloud",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.13.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}