Page History

...

Model			prompt eval rate		second prompt		eval rate
Name	Params, B	SIZE, GB	CPU	GPU	CPU	GPU	CPU	GPU
gemma3:12b	12	8.1	15.73		109.35		6.55
gemma3:4b	4	3.3	48.55		284.47		17.88
gemma3:1b	1	0.8	140.40		177.24		45.26
orca-mini:3b	3	2.0	68.21		1049.24		21.91
orca-mini:7b	7	3.8	33.20		576.83		11.76
orca-mini:13b	13	7.4	17.27		307.94		6.23
orca-mini:70b	70	38	3.28		62.05		1.24
phi4:14b-q4_K_M	14	9.1	12.98	9.87	100.02	113.45	5.78	6.77
phi4-mini:3.8b-q4_K_M	3.8	2.5	49.26	-	186.63	-	18.83	-
phi4:14b-fp16	14	29	12.45	11.53	35.53	40.16	2.08	2.09
openthinker:32b-v2-fp16	32	65	4.45	7.31	20.24	22.31	0.81	0.90
openthinker:32b	32	19	5.93	4.59	66.31	69.74	2.60	2.78
dolphin-phi:2.7b	2.7	1.6	85.67	86.81	744.07	649.43	25.42	21.73
dolphin3:8b	3.8	4.9	26.04	30.97	325.85	373.30	10.76	12.58
tinyllama:1.1b	1.1	0.6	198.18	112.98	2595.12	2211.21	62.99	57.53
deepseek-v2:16b	16	8.9	59.47	15.83	361.51	175.02	24.39	12.00
phi3:14b	14	7.9	15.60	10.51	101.53	128.59	6.07	7.67
llama3.3:70b	70	42	2.60	1.54	21.35	23.37	1.25	1.37
mistral-small3.1:24b	24	15	7.71	-	1321.32	-	3.64	-
llama4:scout	17	67	11.14	-	1683.33	-	4.81	-
openchat:7b	7	4.1	30.47	27.15	273.39	361.04	11.10	14.81
qwen3:32b	32	20	5.67	2.84	38.88	41.60	2.53	2.73
gemma3:27b	27	17	6.60	-	49.38	-	3.04	-
deepseek-r1:70b	70	42	2.63	0.89	12.39	14.13	1.24	1.38

Code Block

root@server1:~/ollama-benchmark#~# ollama list
NAME                       ID              SIZE      MODIFIED
gemma3:12b                 f4031aab637d    8.1 GB    19 minutes ago
gemma3:4b                  a2af6cc3eb7f    3.3 GB    21 minutes ago
gemma3:1b                  8648f39daa8f    815 MB    24 minutes ago
orca-mini:3b               2dbd9f439647    2.0 GB    192 secondshours ago
orca-mini:7b               9c9618e2e895    3.8 GB    52 minuteshours ago
orca-mini:13b              1b4877c90807    7.4 GB    82 minuteshours ago
orca-mini:70b              f184c0860491    38 GB     122 minuteshours ago
phi4:14b-q4_K_M            ac896e5b8b34    9.1 GB    1214 hours ago
phi4-mini:3.8b-q4_K_M      78fad5d182a7    2.5 GB    1214 hours ago
phi4:14b-fp16              227695f919b5    29 GB     1517 hours ago
openthinker:32b-v2-fp16    bedb555dcf18    65 GB     1618 hours ago
openthinker:32b            04b5937dcb16    19 GB     1618 hours ago
dolphin-phi:2.7b           c5761fc77240    1.6 GB    1921 hours ago
dolphin3:8b                d5ab9ae8e1f2    4.9 GB    1921 hours ago
tinyllama:1.1b             2644915ede35    637 MB    1921 hours ago
deepseek-v2:16b            7c8c332f2df7    8.9 GB    3638 hours ago
phi3:14b                   cf611a26b048    7.9 GB    3840 hours ago
llama3.3:70b               a6eb4748fd29    42 GB     3940 hours ago
mistral-small3.1:24b       b9aaf0c2586a    15 GB     3940 hours ago
llama4:scout               4f01ed6b6e01    67 GB     3941 hours ago
openchat:7b                537a4e03b649    4.1 GB    4041 hours ago
qwen3:32b                  e1c9f234c6eb    20 GB     4142 hours ago
gemma3:27b                 a418f5838eaf    17 GB     4142 hours ago
deepseek-r1:70b            0c1615a8ca32    42 GB     4243 hours ago

Switch to GPU

Code Block

systemctl stop ollama.service
source llm_env/bin/activate
pip install --pre --upgrade ipex-llm[cpp]
cd llama-cpp
# Run Ollama Serve with Intel GPU
export OLLAMA_NUM_GPU=999
export OLLAMA_THREADS=22
export OMP_NUM_THREADS=22
export ZES_ENABLE_SYSMAN=1
export no_proxy=localhost,127.0.0.1
source /opt/intel/oneapi/setvars.sh
export SYCL_CACHE_PERSISTENT=1
OLLAMA_HOST=0.0.0.0 ./ollama serve

...

Page tree

Versions Compared

Old Version 20

New Version 21

Key