Skip to content

Instantly share code, notes, and snippets.

@baptistejamin
Created November 3, 2025 09:44
Show Gist options
  • Select an option

  • Save baptistejamin/6a458ea9b62a37caa5e59eb49970bb45 to your computer and use it in GitHub Desktop.

Select an option

Save baptistejamin/6a458ea9b62a37caa5e59eb49970bb45 to your computer and use it in GitHub Desktop.

Revisions

  1. baptistejamin created this gist Nov 3, 2025.
    105 changes: 105 additions & 0 deletions benchmark_logprobs.sh
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,105 @@
    #!/bin/bash
    # Benchmark script to measure performance impact of logprobs with different topK settings
    # Usage: ./scripts/benchmark_logprobs.sh [model_name]

    set -e

    MODEL="${1:-gemma3:1b}"
    PROMPT="Write a short story about a robot."
    NUM_PREDICT=100
    RUNS=5

    echo "=== Logprobs Performance Benchmark ==="
    echo "Model: $MODEL"
    echo "Prompt: $PROMPT"
    echo "Tokens to generate: $NUM_PREDICT"
    echo "Runs per configuration: $RUNS"
    echo ""

    # Function to run benchmark
    benchmark() {
    local logprobs=$1
    local top_logprobs=$2
    local label=$3

    echo "Testing: $label"

    local total_time=0
    local total_tokens=0

    for i in $(seq 1 $RUNS); do
    result=$(curl -s http://localhost:11434/api/generate -d "{
    \"model\": \"$MODEL\",
    \"prompt\": \"$PROMPT\",
    \"stream\": false,
    \"options\": {
    \"num_predict\": $NUM_PREDICT,
    \"temperature\": 0,
    \"seed\": 42
    },
    \"logprobs\": $logprobs,
    \"top_logprobs\": $top_logprobs
    }")

    # Extract timing info
    eval_duration=$(echo "$result" | jq -r '.eval_duration // 0')
    eval_count=$(echo "$result" | jq -r '.eval_count // 0')

    if [ "$eval_duration" != "0" ] && [ "$eval_count" != "0" ]; then
    total_time=$((total_time + eval_duration))
    total_tokens=$((total_tokens + eval_count))
    fi

    # Small delay between runs
    sleep 0.5
    done

    # Calculate averages (duration is in nanoseconds)
    avg_time_ms=$((total_time / RUNS / 1000000))
    avg_tokens=$((total_tokens / RUNS))

    if [ "$avg_tokens" -gt 0 ]; then
    tokens_per_sec=$((avg_tokens * 1000000000 / (total_time / RUNS)))
    ms_per_token=$((avg_time_ms / avg_tokens))

    printf " Average time: %d ms\n" $avg_time_ms
    printf " Tokens: %d\n" $avg_tokens
    printf " Tokens/sec: %d\n" $tokens_per_sec
    printf " ms/token: %d\n" $ms_per_token
    else
    echo " Failed to get valid results"
    fi

    echo ""
    }

    # Check if model exists
    if ! curl -s http://localhost:11434/api/tags | jq -e ".models[] | select(.name == \"$MODEL\")" > /dev/null 2>&1; then
    echo "Model $MODEL not found. Pulling..."
    ollama pull "$MODEL"
    fi

    echo "Warming up model..."
    curl -s http://localhost:11434/api/generate -d "{
    \"model\": \"$MODEL\",
    \"prompt\": \"test\",
    \"stream\": false,
    \"options\": {\"num_predict\": 1}
    }" > /dev/null

    echo "Starting benchmarks..."
    echo ""

    # Baseline: No logprobs
    benchmark false 0 "Baseline (no logprobs)"

    # With logprobs, no top_k
    benchmark true 0 "Logprobs only (no top_k)"

    # With logprobs and different top_k values
    benchmark true 1 "Logprobs + top_k=1"
    benchmark true 5 "Logprobs + top_k=5"
    benchmark true 10 "Logprobs + top_k=10"
    benchmark true 20 "Logprobs + top_k=20"

    echo "=== Benchmark Complete ==="