With Power Limits 95/110, Ubuntu 24.04, Results: Test 1 Intel Core Ultra 9 185H,
Crucial 128GB (2x64GB) 5600MT/s DDR5 SODIMM, WD_BLACK SN850x 8TB
GPU Version: intel-ollama-0.6.2 GPU SYCL0 (Intel(R) Arc(TM) Graphics) - 120187 MiB
CPU Version: ollama version is 0.7.0 CPU 123.5 GiB available
...
orca-mini:3b...
2.0...
orca-mini:7b...
orca-mini:13b...
orca-mini:70b...
185 H CPU vs GPU ollama models speed
Size of Models
...
| Code Block |
|---|
root@server1:~# ollama list NAME ID SIZE MODIFIED gemma3:12b f4031aab637d 8.1 GB 19 minutes ago gemma3:4b a2af6cc3eb7f 3.3 GB 21 minutes ago gemma3:1b 8648f39daa8f 815 MB 24 minutes ago orca-mini:3b 2dbd9f439647 2.0 GB 2 hours ago orca-mini:7b 9c9618e2e895 3.8 GB 2 hours ago orca-mini:13b 1b4877c90807 7.4 GB 2 hours ago orca-mini:70b f184c0860491 38 GB 2 hours ago phi4:14b-q4_K_M ac896e5b8b34 9.1 GB 14 hours ago phi4-mini:3.8b-q4_K_M 78fad5d182a7 2.5 GB 14 hours ago phi4:14b-fp16 227695f919b5 29 GB 17 hours ago openthinker:32b-v2-fp16 bedb555dcf18 65 GB 18 hours ago openthinker:32b 04b5937dcb16 19 GB 18 hours ago dolphin-phi:2.7b c5761fc77240 1.6 GB 21 hours ago dolphin3:8b d5ab9ae8e1f2 4.9 GB 21 hours ago tinyllama:1.1b 2644915ede35 637 MB 21 hours ago deepseek-v2:16b 7c8c332f2df7 8.9 GB 38 hours ago phi3:14b cf611a26b048 7.9 GB 40 hours ago llama3.3:70b a6eb4748fd29 42 GB 40 hours ago mistral-small3.1:24b b9aaf0c2586a 15 GB 40 hours ago llama4:scout 4f01ed6b6e01 67 GB 41 hours ago openchat:7b 537a4e03b649 4.1 GB 41 hours ago qwen3:32b e1c9f234c6eb 20 GB 42 hours ago gemma3:27b a418f5838eaf 17 GB 42 hours ago deepseek-r1:70b 0c1615a8ca32 42 GB 43 hours ago |
...
| Code Block | ||||
|---|---|---|---|---|
| ||||
top - 14:20:49 up 2:14, 4 users, load average: 1.75, 2.91, 2.01
Tasks: 344 total, 2 running, 342 sleeping, 0 stopped, 0 zombie
%Cpu0 : 0.0 us, 0.0 sy, 0.0 ni,100.0 id, 0.0 wa, 0.0 hi, 0.0 si, 0.0 st
%Cpu1 : 0.0 us, 0.0 sy, 0.0 ni, 0.3 id, 99.7 wa, 0.0 hi, 0.0 si, 0.0 st
%Cpu2 : 0.0 us, 0.0 sy, 0.0 ni,100.0 id, 0.0 wa, 0.0 hi, 0.0 si, 0.0 st
%Cpu3 : 44.0 us, 56.0 sy, 0.0 ni, 0.0 id, 0.0 wa, 0.0 hi, 0.0 si, 0.0 st
%Cpu4 : 0.0 us, 0.0 sy, 0.0 ni,100.0 id, 0.0 wa, 0.0 hi, 0.0 si, 0.0 st
%Cpu5 : 0.0 us, 0.0 sy, 0.0 ni,100.0 id, 0.0 wa, 0.0 hi, 0.0 si, 0.0 st
%Cpu6 : 0.0 us, 0.0 sy, 0.0 ni,100.0 id, 0.0 wa, 0.0 hi, 0.0 si, 0.0 st
%Cpu7 : 0.0 us, 0.0 sy, 0.0 ni,100.0 id, 0.0 wa, 0.0 hi, 0.0 si, 0.0 st
%Cpu8 : 0.0 us, 0.0 sy, 0.0 ni,100.0 id, 0.0 wa, 0.0 hi, 0.0 si, 0.0 st
%Cpu9 : 0.0 us, 0.0 sy, 0.0 ni,100.0 id, 0.0 wa, 0.0 hi, 0.0 si, 0.0 st
%Cpu10 : 0.0 us, 0.0 sy, 0.0 ni,100.0 id, 0.0 wa, 0.0 hi, 0.0 si, 0.0 st
%Cpu11 : 0.0 us, 0.0 sy, 0.0 ni,100.0 id, 0.0 wa, 0.0 hi, 0.0 si, 0.0 st
%Cpu12 : 0.0 us, 0.0 sy, 0.0 ni,100.0 id, 0.0 wa, 0.0 hi, 0.0 si, 0.0 st
%Cpu13 : 0.0 us, 0.0 sy, 0.0 ni,100.0 id, 0.0 wa, 0.0 hi, 0.0 si, 0.0 st
%Cpu14 : 0.0 us, 0.0 sy, 0.0 ni,100.0 id, 0.0 wa, 0.0 hi, 0.0 si, 0.0 st
%Cpu15 : 0.0 us, 0.0 sy, 0.0 ni,100.0 id, 0.0 wa, 0.0 hi, 0.0 si, 0.0 st
%Cpu16 : 0.0 us, 0.0 sy, 0.0 ni,100.0 id, 0.0 wa, 0.0 hi, 0.0 si, 0.0 st
%Cpu17 : 0.0 us, 0.0 sy, 0.0 ni,100.0 id, 0.0 wa, 0.0 hi, 0.0 si, 0.0 st
%Cpu18 : 0.0 us, 0.0 sy, 0.0 ni,100.0 id, 0.0 wa, 0.0 hi, 0.0 si, 0.0 st
%Cpu19 : 0.0 us, 0.0 sy, 0.0 ni,100.0 id, 0.0 wa, 0.0 hi, 0.0 si, 0.0 st
%Cpu20 : 0.0 us, 0.0 sy, 0.0 ni,100.0 id, 0.0 wa, 0.0 hi, 0.0 si, 0.0 st
%Cpu21 : 0.0 us, 0.0 sy, 0.0 ni,100.0 id, 0.0 wa, 0.0 hi, 0.0 si, 0.0 st
MiB Mem : 54.3/128337.6 [||||||||||||||||||||||||||||||||||||||||||||||||||||||| ]
MiB Swap: 0.0/8192.0 [ ]
PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND
68407 root 20 0 4472460 1.3g 369680 R 100.3 1.0 2:31.49 ollama-lib
1 root 20 0 22136 12508 9340 S 0.0 0.0 0:00.81 systemd
RES 2 root SHR S %CPU 20 %MEM 0 TIME+ COMMAND
68407 0root 020 0 4472460 0 S1.3g 369680 R 0100.03 01.0 02:0031.0049 kthreaddollama-lib
31 root 20 0 22136 12508 0 0 0 9340 S 0.0 0.0 0:00.00 pool_workqueue_release
|
script
81 systemd
2 root 20 0 0 0 0 S 0.0 0.0 0:00.00 kthreadd
3 root 20 0 0 0 0 S 0.0 0.0 0:00.00 pool_workqueue_release
|
script
| Code Block | ||||
|---|---|---|---|---|
| ||||
#!/bin/bash
# Benchmark using ollama gives rate of tokens per second
# idea taken from https://taoofmac.com/space/blog/2024/01/20/1800
# batch-obench.sh script is modification of obench.sh from https://github.com/tabletuser-blogspot/ollama-benchmark
# done by liutyi for https://wiki.liutyi.info test
set -e
borange='\e[0;33m'
yellow='\e[1;33m'
purple='\e[0;35m'
green='\e[0;32m'
red='\e[0;31m'
blue='\e[0;34m'
NC='\e[0m' # No Color
cpu_def=$(cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor)
echo "Setting cpu governor to"
sudo echo performance | sudo tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor
gpu_avail=$(sudo lshw -C display | grep product: | head -1 | cut -c17-)
cpugover=$(cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor)
cpu_used=$(lscpu | grep 'Model name' | cut -f 2 -d ":" | awk '{$1=$1}1')
echo ""
echo "Simple benchmark using ollama and"
echo "whatever local Model is installed."
echo "Does not identify if $gpu_avail is benchmarking"
echo ""
benchmark=3
echo "How many times to run the benchmark?"
echo $benchmark
echo ""
for model in `ollama ls |awk '{print $1}'|grep -v NAME`; do
echo -e "Total runs "${purple}$benchmark${NC}
echo ""
echo ""
echo $model
ollama show $model --system | ||||
| Code Block | ||||
| ||||
#!/bin/bash # Benchmark using ollama gives rate of tokens per second # idea taken from https://taoofmac.com/space/blog/2024/01/20/1800 # batch-obench.sh script is modification of obench.sh from https://github.com/tabletuser-blogspot/ollama-benchmark # done by liutyi for https://wiki.liutyi.info test set -e borange='\e[0;33m' yellow='\e[1;33m' purple='\e[0;35m' green='\e[0;32m' red='\e[0;31m' blue='\e[0;34m' NC='\e[0m' # No Color cpu_def=$(cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor) echo "Setting cpu governor to" sudo echo performance | sudo tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor gpu_avail=$(sudo lshw -C display | grep product: | head -1 | cut -c17-) cpugover=$(cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor) cpu_used=$(lscpu | grep 'Model name' | cut -f 2 -d ":" | awk '{$1=$1}1') echo "" echo "Simple benchmark using ollama and" echo "whatever local Model is installed." echo "Does not identify if $gpu_avail is benchmarking" echo "" benchmark=3 echo "How many times to run the benchmark?" echo $benchmark echo "" for model in `ollama ls |awk '{print $1}'|grep -v NAME`; do echo -e "Total runs "${purple}$benchmark${NC} echo "" echo "" echo $model ollama show $model --system echo "" | tee -a results.txt echo -e "Will use model: "${green}$model${NC} | tee -a results.txt echo "" | tee -a results.txt echo -e Will benchmark the tokens per second for ${cpu_used} and or ${gpu_avail} | tee -a results.txt echo "" | tee -a results.txt echo "" | tee -a results.txt echo -e Running benchmark ${purple}$benchmark${NC} times for ${cpu_used} and or ${gpu_avail} | tee -a results.txt echo -e with ${borange}$cpugover${NC} setting for cpu governor | tee -a results.txt echo "" | tee -a results.txt for run in $(seq 1 $benchmark); do echo "Why is the blue sky blue?" | ollama run $model --verbose 2>&1 >/dev/null | grep "eval rate:" | tee -a results.txt ; avg=$(cat results.txt | grep -v "prompt eval rate:" |tail -n $benchmark | awk '{print $3}' | awk 'NR>1{ tot+=$1 } END{ print tot/(NR-1) }') done echo "" | tee -a results.txt echo -e "Will use model: "${redgreen}$avg$$model${NC} is| thetee average ${blue}tokens per second${NC} using ${green}$model${NC} model -a results.txt echo "" | tee -a results.txt echo echo -e Will benchmark the tokens per second for $cpu${cpu_used} and or $gpu${gpu_avail} | tee -a results.txt done echo echo -e"" using ${borange}$cpugover${NC} for cpu governor. echo "" echo "Setting cpu governor to" sudo echo $cpu_def | sudo tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor echo . |
...
| Code Block | ||||
|---|---|---|---|---|
| ||||
Old quant types (some base model types require these): - Q4_0: small, very high quality loss - legacy, prefer using Q3_K_M - Q4_1: small, substantial quality loss - legacy, prefer using Q3_K_L - Q5_0: medium, balanced quality - legacy, prefer using Q4_K_M - Q5_1: medium, low quality loss - legacy, prefer using Q5_K_M New quant types (recommended): - Q2_K: smallest, extreme quality loss - not recommended - Q3_K: alias for Q3_K_M - Q3_K_S: very small, very high quality loss - Q3_K_M: very small, very high quality loss - Q3_K_L: small, substantial quality loss - Q4_K: alias for Q4_K_M - Q4_K_S: small, significant quality loss - Q4_K_M: medium, balanced quality - recommended - Q5_K: alias for Q5_K_M - Q5_K_S: large, low quality loss - recommended - Q5_K_M: large, very low quality loss - recommended - Q6_K: very large, extremely low quality loss - Q8_0: very large, extremely low quality loss - not recommended - F16: extremely large, virtually no quality loss - not recommended - F32: absolutely huge, lossless - not recommended| tee -a results.txt echo "" | tee -a results.txt echo -e Running benchmark ${purple}$benchmark${NC} times for ${cpu_used} and or ${gpu_avail} | tee -a results.txt echo -e with ${borange}$cpugover${NC} setting for cpu governor | tee -a results.txt echo "" | tee -a results.txt for run in $(seq 1 $benchmark); do echo "Why is the blue sky blue?" | ollama run $model --verbose 2>&1 >/dev/null | grep "eval rate:" | tee -a results.txt ; avg=$(cat results.txt | grep -v "prompt eval rate:" |tail -n $benchmark | awk '{print $3}' | awk 'NR>1{ tot+=$1 } END{ print tot/(NR-1) }') done echo "" | tee -a results.txt echo -e ${red}$avg${NC} is the average ${blue}tokens per second${NC} using ${green}$model${NC} model | tee -a results.txt echo for $cpu_used and or $gpu_avail | tee -a results.txt done echo echo -e using ${borange}$cpugover${NC} for cpu governor. echo "" echo "Setting cpu governor to" sudo echo $cpu_def | sudo tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor echo . |