Skip to content

Instantly share code, notes, and snippets.

@baptistejamin
Created November 3, 2025 09:44
Show Gist options
  • Select an option

  • Save baptistejamin/6a458ea9b62a37caa5e59eb49970bb45 to your computer and use it in GitHub Desktop.

Select an option

Save baptistejamin/6a458ea9b62a37caa5e59eb49970bb45 to your computer and use it in GitHub Desktop.
#!/bin/bash
# Benchmark script to measure performance impact of logprobs with different topK settings
# Usage: ./scripts/benchmark_logprobs.sh [model_name]
set -e
MODEL="${1:-gemma3:1b}"
PROMPT="Write a short story about a robot."
NUM_PREDICT=100
RUNS=5
echo "=== Logprobs Performance Benchmark ==="
echo "Model: $MODEL"
echo "Prompt: $PROMPT"
echo "Tokens to generate: $NUM_PREDICT"
echo "Runs per configuration: $RUNS"
echo ""
# Function to run benchmark
benchmark() {
local logprobs=$1
local top_logprobs=$2
local label=$3
echo "Testing: $label"
local total_time=0
local total_tokens=0
for i in $(seq 1 $RUNS); do
result=$(curl -s http://localhost:11434/api/generate -d "{
\"model\": \"$MODEL\",
\"prompt\": \"$PROMPT\",
\"stream\": false,
\"options\": {
\"num_predict\": $NUM_PREDICT,
\"temperature\": 0,
\"seed\": 42
},
\"logprobs\": $logprobs,
\"top_logprobs\": $top_logprobs
}")
# Extract timing info
eval_duration=$(echo "$result" | jq -r '.eval_duration // 0')
eval_count=$(echo "$result" | jq -r '.eval_count // 0')
if [ "$eval_duration" != "0" ] && [ "$eval_count" != "0" ]; then
total_time=$((total_time + eval_duration))
total_tokens=$((total_tokens + eval_count))
fi
# Small delay between runs
sleep 0.5
done
# Calculate averages (duration is in nanoseconds)
avg_time_ms=$((total_time / RUNS / 1000000))
avg_tokens=$((total_tokens / RUNS))
if [ "$avg_tokens" -gt 0 ]; then
tokens_per_sec=$((avg_tokens * 1000000000 / (total_time / RUNS)))
ms_per_token=$((avg_time_ms / avg_tokens))
printf " Average time: %d ms\n" $avg_time_ms
printf " Tokens: %d\n" $avg_tokens
printf " Tokens/sec: %d\n" $tokens_per_sec
printf " ms/token: %d\n" $ms_per_token
else
echo " Failed to get valid results"
fi
echo ""
}
# Check if model exists
if ! curl -s http://localhost:11434/api/tags | jq -e ".models[] | select(.name == \"$MODEL\")" > /dev/null 2>&1; then
echo "Model $MODEL not found. Pulling..."
ollama pull "$MODEL"
fi
echo "Warming up model..."
curl -s http://localhost:11434/api/generate -d "{
\"model\": \"$MODEL\",
\"prompt\": \"test\",
\"stream\": false,
\"options\": {\"num_predict\": 1}
}" > /dev/null
echo "Starting benchmarks..."
echo ""
# Baseline: No logprobs
benchmark false 0 "Baseline (no logprobs)"
# With logprobs, no top_k
benchmark true 0 "Logprobs only (no top_k)"
# With logprobs and different top_k values
benchmark true 1 "Logprobs + top_k=1"
benchmark true 5 "Logprobs + top_k=5"
benchmark true 10 "Logprobs + top_k=10"
benchmark true 20 "Logprobs + top_k=20"
echo "=== Benchmark Complete ==="
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment