Created
November 3, 2025 09:44
-
-
Save baptistejamin/6a458ea9b62a37caa5e59eb49970bb45 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/bin/bash | |
| # Benchmark script to measure performance impact of logprobs with different topK settings | |
| # Usage: ./scripts/benchmark_logprobs.sh [model_name] | |
| set -e | |
| MODEL="${1:-gemma3:1b}" | |
| PROMPT="Write a short story about a robot." | |
| NUM_PREDICT=100 | |
| RUNS=5 | |
| echo "=== Logprobs Performance Benchmark ===" | |
| echo "Model: $MODEL" | |
| echo "Prompt: $PROMPT" | |
| echo "Tokens to generate: $NUM_PREDICT" | |
| echo "Runs per configuration: $RUNS" | |
| echo "" | |
| # Function to run benchmark | |
| benchmark() { | |
| local logprobs=$1 | |
| local top_logprobs=$2 | |
| local label=$3 | |
| echo "Testing: $label" | |
| local total_time=0 | |
| local total_tokens=0 | |
| for i in $(seq 1 $RUNS); do | |
| result=$(curl -s http://localhost:11434/api/generate -d "{ | |
| \"model\": \"$MODEL\", | |
| \"prompt\": \"$PROMPT\", | |
| \"stream\": false, | |
| \"options\": { | |
| \"num_predict\": $NUM_PREDICT, | |
| \"temperature\": 0, | |
| \"seed\": 42 | |
| }, | |
| \"logprobs\": $logprobs, | |
| \"top_logprobs\": $top_logprobs | |
| }") | |
| # Extract timing info | |
| eval_duration=$(echo "$result" | jq -r '.eval_duration // 0') | |
| eval_count=$(echo "$result" | jq -r '.eval_count // 0') | |
| if [ "$eval_duration" != "0" ] && [ "$eval_count" != "0" ]; then | |
| total_time=$((total_time + eval_duration)) | |
| total_tokens=$((total_tokens + eval_count)) | |
| fi | |
| # Small delay between runs | |
| sleep 0.5 | |
| done | |
| # Calculate averages (duration is in nanoseconds) | |
| avg_time_ms=$((total_time / RUNS / 1000000)) | |
| avg_tokens=$((total_tokens / RUNS)) | |
| if [ "$avg_tokens" -gt 0 ]; then | |
| tokens_per_sec=$((avg_tokens * 1000000000 / (total_time / RUNS))) | |
| ms_per_token=$((avg_time_ms / avg_tokens)) | |
| printf " Average time: %d ms\n" $avg_time_ms | |
| printf " Tokens: %d\n" $avg_tokens | |
| printf " Tokens/sec: %d\n" $tokens_per_sec | |
| printf " ms/token: %d\n" $ms_per_token | |
| else | |
| echo " Failed to get valid results" | |
| fi | |
| echo "" | |
| } | |
| # Check if model exists | |
| if ! curl -s http://localhost:11434/api/tags | jq -e ".models[] | select(.name == \"$MODEL\")" > /dev/null 2>&1; then | |
| echo "Model $MODEL not found. Pulling..." | |
| ollama pull "$MODEL" | |
| fi | |
| echo "Warming up model..." | |
| curl -s http://localhost:11434/api/generate -d "{ | |
| \"model\": \"$MODEL\", | |
| \"prompt\": \"test\", | |
| \"stream\": false, | |
| \"options\": {\"num_predict\": 1} | |
| }" > /dev/null | |
| echo "Starting benchmarks..." | |
| echo "" | |
| # Baseline: No logprobs | |
| benchmark false 0 "Baseline (no logprobs)" | |
| # With logprobs, no top_k | |
| benchmark true 0 "Logprobs only (no top_k)" | |
| # With logprobs and different top_k values | |
| benchmark true 1 "Logprobs + top_k=1" | |
| benchmark true 5 "Logprobs + top_k=5" | |
| benchmark true 10 "Logprobs + top_k=10" | |
| benchmark true 20 "Logprobs + top_k=20" | |
| echo "=== Benchmark Complete ===" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment