Last active
October 25, 2025 07:46
-
-
Save mchiang0610/fd7ec5d4aa776de1b07bc57de3e10b15 to your computer and use it in GitHub Desktop.
Revisions
-
mchiang0610 revised this gist
Oct 22, 2025 . 2 changed files with 16261 additions and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -10,7 +10,7 @@ pip install ollama Running the benchmark: ``` echo <model> | python benchmark.py -k0.2 -c10 -n500 --temperature 0 -p "write an in-depth summary of this story: $(head -n200 pg98.txt)" > dgx-model.bench ``` To examine the benchmarks use golang's benchstat: -
mchiang0610 created this gist
Oct 17, 2025 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,80 @@ # /// script # dependencies = [ # "ollama", # ] # /// import os import sys import time from argparse import ArgumentParser, FileType import ollama def main(): parser = ArgumentParser() parser.add_argument("-c", "--count", type=int, default=1) parser.add_argument("-n", "--max-new-tokens", type=int, default=100) parser.add_argument("-p", "--prompt", type=str, default="Write a long story.") parser.add_argument("-k", "--keep-alive", type=float, default=None) parser.add_argument("--temperature", type=float, default=None) parser.add_argument("input", type=FileType("r"), default=sys.stdin, nargs="?") args = parser.parse_args() uname = os.uname() # TODO: metadata should be retrieved from device under test print("sysname:", uname.sysname) print("machine:", uname.machine) for line in args.input: model = line.strip() for _ in range(args.count): response = ollama.chat( model=model, messages=[{"role": "user", "content": args.prompt}], options=ollama.Options( num_predict=args.max_new_tokens, temperature=args.temperature, seed=0 if args.temperature is not None else None, ), keep_alive=args.keep_alive, ) for k, v in { "prefill": { "count": response.prompt_eval_count, "duration": response.prompt_eval_duration, }, "generate": { "count": response.eval_count, "duration": response.eval_duration, }, }.items(): count = v["count"] duration = v["duration"] print( f"BenchmarkModel/name={model}/step={k}", count, duration / count, "ns/token", count / (duration + 1e-12) * 1e9, "token/sec", ) for k, v in { "": response.total_duration, "/step=load": response.load_duration, }.items(): print( f"BenchmarkModel/name={model}{k}", 1, v, "ns/request", ) if args.keep_alive: time.sleep(args.keep_alive + 0.2) if __name__ == "__main__": main() This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,20 @@ Installation: ``` go install golang.org/x/perf/cmd/benchstat@latest python3 -m venv venv source venv/bin/activate pip install ollama ``` Running the benchmark: ``` echo <model> | python benchmark.py -k0.2 -c10 -n500 --temperature 0 -p "write me a short story" > dgx-model.bench ``` To examine the benchmarks use golang's benchstat: ``` ~/go/bin/benchstat dgx-model.bench ```