baptistejamin · November 3, 2025 09:44 · Nov 3, 2025
diff --git a/benchmark_logprobs.sh b/benchmark_logprobs.sh
@@ -0,0 +1,105 @@
+#!/bin/bash
+# Benchmark script to measure performance impact of logprobs with different topK settings
+# Usage: ./scripts/benchmark_logprobs.sh [model_name]
+
+set -e
+
+MODEL="${1:-gemma3:1b}"
+PROMPT="Write a short story about a robot."
+NUM_PREDICT=100
+RUNS=5
+
+echo "=== Logprobs Performance Benchmark ==="
+echo "Model: $MODEL"
+echo "Prompt: $PROMPT"
+echo "Tokens to generate: $NUM_PREDICT"
+echo "Runs per configuration: $RUNS"
+echo ""
+
+# Function to run benchmark
+benchmark() {
+    local logprobs=$1
+    local top_logprobs=$2
+    local label=$3
+
+    echo "Testing: $label"
+
+    local total_time=0
+    local total_tokens=0
+
+    for i in $(seq 1 $RUNS); do
+        result=$(curl -s http://localhost:11434/api/generate -d "{
+            \"model\": \"$MODEL\",
+            \"prompt\": \"$PROMPT\",
+            \"stream\": false,
+            \"options\": {
+                \"num_predict\": $NUM_PREDICT,
+                \"temperature\": 0,
+                \"seed\": 42
+            },
+            \"logprobs\": $logprobs,
+            \"top_logprobs\": $top_logprobs
+        }")
+
+        # Extract timing info
+        eval_duration=$(echo "$result" | jq -r '.eval_duration // 0')
+        eval_count=$(echo "$result" | jq -r '.eval_count // 0')
+
+        if [ "$eval_duration" != "0" ] && [ "$eval_count" != "0" ]; then
+            total_time=$((total_time + eval_duration))
+            total_tokens=$((total_tokens + eval_count))
+        fi
+
+        # Small delay between runs
+        sleep 0.5
+    done
+
+    # Calculate averages (duration is in nanoseconds)
+    avg_time_ms=$((total_time / RUNS / 1000000))
+    avg_tokens=$((total_tokens / RUNS))
+
+    if [ "$avg_tokens" -gt 0 ]; then
+        tokens_per_sec=$((avg_tokens * 1000000000 / (total_time / RUNS)))
+        ms_per_token=$((avg_time_ms / avg_tokens))
+
+        printf "  Average time: %d ms\n" $avg_time_ms
+        printf "  Tokens: %d\n" $avg_tokens
+        printf "  Tokens/sec: %d\n" $tokens_per_sec
+        printf "  ms/token: %d\n" $ms_per_token
+    else
+        echo "  Failed to get valid results"
+    fi
+
+    echo ""
+}
+
+# Check if model exists
+if ! curl -s http://localhost:11434/api/tags | jq -e ".models[] | select(.name == \"$MODEL\")" > /dev/null 2>&1; then
+    echo "Model $MODEL not found. Pulling..."
+    ollama pull "$MODEL"
+fi
+
+echo "Warming up model..."
+curl -s http://localhost:11434/api/generate -d "{
+    \"model\": \"$MODEL\",
+    \"prompt\": \"test\",
+    \"stream\": false,
+    \"options\": {\"num_predict\": 1}
+}" > /dev/null
+
+echo "Starting benchmarks..."
+echo ""
+
+# Baseline: No logprobs
+benchmark false 0 "Baseline (no logprobs)"
+
+# With logprobs, no top_k
+benchmark true 0 "Logprobs only (no top_k)"
+
+# With logprobs and different top_k values
+benchmark true 1 "Logprobs + top_k=1"
+benchmark true 5 "Logprobs + top_k=5"
+benchmark true 10 "Logprobs + top_k=10"
+benchmark true 20 "Logprobs + top_k=20"
+
+echo "=== Benchmark Complete ==="
No results found