Created
November 3, 2025 09:44
-
-
Save baptistejamin/6a458ea9b62a37caa5e59eb49970bb45 to your computer and use it in GitHub Desktop.
Revisions
-
baptistejamin created this gist
Nov 3, 2025 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,105 @@ #!/bin/bash # Benchmark script to measure performance impact of logprobs with different topK settings # Usage: ./scripts/benchmark_logprobs.sh [model_name] set -e MODEL="${1:-gemma3:1b}" PROMPT="Write a short story about a robot." NUM_PREDICT=100 RUNS=5 echo "=== Logprobs Performance Benchmark ===" echo "Model: $MODEL" echo "Prompt: $PROMPT" echo "Tokens to generate: $NUM_PREDICT" echo "Runs per configuration: $RUNS" echo "" # Function to run benchmark benchmark() { local logprobs=$1 local top_logprobs=$2 local label=$3 echo "Testing: $label" local total_time=0 local total_tokens=0 for i in $(seq 1 $RUNS); do result=$(curl -s http://localhost:11434/api/generate -d "{ \"model\": \"$MODEL\", \"prompt\": \"$PROMPT\", \"stream\": false, \"options\": { \"num_predict\": $NUM_PREDICT, \"temperature\": 0, \"seed\": 42 }, \"logprobs\": $logprobs, \"top_logprobs\": $top_logprobs }") # Extract timing info eval_duration=$(echo "$result" | jq -r '.eval_duration // 0') eval_count=$(echo "$result" | jq -r '.eval_count // 0') if [ "$eval_duration" != "0" ] && [ "$eval_count" != "0" ]; then total_time=$((total_time + eval_duration)) total_tokens=$((total_tokens + eval_count)) fi # Small delay between runs sleep 0.5 done # Calculate averages (duration is in nanoseconds) avg_time_ms=$((total_time / RUNS / 1000000)) avg_tokens=$((total_tokens / RUNS)) if [ "$avg_tokens" -gt 0 ]; then tokens_per_sec=$((avg_tokens * 1000000000 / (total_time / RUNS))) ms_per_token=$((avg_time_ms / avg_tokens)) printf " Average time: %d ms\n" $avg_time_ms printf " Tokens: %d\n" $avg_tokens printf " Tokens/sec: %d\n" $tokens_per_sec printf " ms/token: %d\n" $ms_per_token else echo " Failed to get valid results" fi echo "" } # Check if model exists if ! curl -s http://localhost:11434/api/tags | jq -e ".models[] | select(.name == \"$MODEL\")" > /dev/null 2>&1; then echo "Model $MODEL not found. Pulling..." ollama pull "$MODEL" fi echo "Warming up model..." curl -s http://localhost:11434/api/generate -d "{ \"model\": \"$MODEL\", \"prompt\": \"test\", \"stream\": false, \"options\": {\"num_predict\": 1} }" > /dev/null echo "Starting benchmarks..." echo "" # Baseline: No logprobs benchmark false 0 "Baseline (no logprobs)" # With logprobs, no top_k benchmark true 0 "Logprobs only (no top_k)" # With logprobs and different top_k values benchmark true 1 "Logprobs + top_k=1" benchmark true 5 "Logprobs + top_k=5" benchmark true 10 "Logprobs + top_k=10" benchmark true 20 "Logprobs + top_k=20" echo "=== Benchmark Complete ==="