Versions Compared

Key

  • This line was added.
  • This line was removed.
  • Formatting was changed.

...

Code Block
sudo apt update
sudo apt upgrade
sudo add-apt-repository ppa:deadsnakes/ppa
sudo apt install python3.11 -y
sudo apt install python3.11-venv -y
python3.11 -V
python3.11 -m venv llm_env
source llm_env/bin/activate
pip install --pre --upgrade ipex-llm[cpp]
mkdir llama-cpp
cd llama-cpp
# Run Ollama Serve with Intel GPU
export OLLAMA_NUM_GPU=999128
export no_proxy=localhost,127.0.0.1
export ZES_ENABLE_SYSMAN=1
source /opt/intel/oneapi/setvars.sh
export SYCL_CACHE_PERSISTENT=1
# localhost access
# ./ollama serve
# for non-localhost access
OLLAMA_HOST=0.0.0.0 ./ollama serve

...


sec to load modellayers to GPU
DeepSeek R1 Distill Llama 70B54.2581/81
Qwen3 32B28.0465/65

llama.cpp

https://github.com/ggml-org/llama.cpp

...