Code Block

sudo apt update
sudo apt upgrade
sudo add-apt-repository ppa:deadsnakes/ppa
sudo apt install python3.11 -y
sudo apt install python3.11-venv -y
python3.11 -V
python3.11 -m venv llm_env
source llm_env/bin/activate
pip install --pre --upgrade ipex-llm[cpp]
mkdir llama-cpp
cd llama-cpp
# Run Ollama Serve with Intel GPU
export OLLAMA_NUM_GPU=999128
export no_proxy=localhost,127.0.0.1
export ZES_ENABLE_SYSMAN=1
source /opt/intel/oneapi/setvars.sh
export SYCL_CACHE_PERSISTENT=1
# localhost access
# ./ollama serve
# for non-localhost access
OLLAMA_HOST=0.0.0.0 ./ollama serve

...

	sec to load model	layers to GPU
DeepSeek R1 Distill Llama 70B	54.25	81/81
Qwen3 32B	28.04	65/65

llama.cpp

https://github.com/ggml-org/llama.cpp

...

Page tree

Versions Compared

Old Version 4

New Version 5

Key

llama.cpp

Page tree

Page History

Versions Compared

Old Version 4

New Version 5

Key

llama.cpp