Page History

...

Code Block

#!/bin/bash
# Benchmark using ollama gives rate of tokens per second
# idea taken from https://taoofmac.com/space/blog/2024/01/20/1800
# other colors
#Black          0;30    Dark Gray       1;30
#Red            0;31    Light Red       1;31
#Green          0;32    Light Green   1;32
#Brown/Orange 0;33      Yellow          1;33
#Blue           0;34    Light Blue      1;34
#Purple         0;35    Light Purple  1;35
#Cyan           0;36    Light Cyan      1;36
#Light Gray   0;37      White           1;37
#ANSI option
#RED='\033[0;31m'
#NC='\033[0m' # No Color
#echo -e "${red}Hello Stackoverflow${NC}"
#set -e used for troubleshooting
set -e
#colors available
borange='\e[0;33m'
yellow='\e[1;33m'
purple='\e[0;35m'
green='\e[0;32m'
red='\e[0;31m'
blue='\e[0;34m'
NC='\e[0m' # No Color
cpu_def=$(cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor)
echo "Setting cpu governor to"
sudo echo performance | sudo tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor
gpu_avail=$(sudo lshw -C display | grep product: | head -1 | cut -c17-)
cpugover=$(cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor)
cpu_used=$(lscpu | grep 'Model name' | cut -f 2 -d ":" | awk '{$1=$1}1')
echo ""
echo "Simple benchmark using ollama and"
echo "whatever local Model is installed."
echo "Does not identify if $gpu_avail is benchmarking"
echo ""
benchmark=3
echo "How many times to run the benchmark?"
echo  $benchmark
echo ""
for model in `ollama ls |awk '{print $1}'|grep -v NAME`; do
 echo -e "Total runs "${purple}$benchmark${NC}
 echo ""
 #echo "Current models available locally"
 #echo ""
 #ollama list
 #echo ""
 #echo "Example enter tinyllama or dolphin-phi"
 echo ""
 echo $model
 ollama show $model --system
 echo "" | tee -a results.txt
 echo -e "Will use model: "${green}$model${NC} | tee -a results.txt
 echo "" | tee -a results.txt
 echo -e Will benchmark the tokens per second for ${cpu_used} and or ${gpu_avail} | tee -a results.txt
 echo "" | tee -a results.txt
 echo "" | tee -a results.txt
 echo -e Running benchmark ${purple}$benchmark${NC} times for ${cpu_used} and or ${gpu_avail} | tee -a results.txt
 echo -e with ${borange}$cpugover${NC} setting for cpu governor | tee -a results.txt
 echo "" | tee -a results.txt
 for run in $(seq 1 $benchmark); do
  echo "Why is the blue sky blue?" | ollama run $model --verbose 2>&1 >/dev/null | grep "eval rate:" | tee -a results.txt ;
  avg=$(cat results.txt | grep -v "prompt eval rate:" |tail -n $benchmark | awk '{print $3}' | awk 'NR>1{ tot+=$1 } END{ print tot/(NR-1) }')
 done
 echo "" | tee -a results.txt
 echo -e ${red}$avg${NC} is the average ${blue}tokens per second${NC} using ${green}$model${NC} model | tee -a results.txt
 echo for $cpu_used and or $gpu_avail | tee -a results.txt
done
echo
echo -e using ${borange}$cpugover${NC} for cpu governor.
echo ""
echo "Setting cpu governor to"
sudo echo $cpu_def | sudo tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor
#comment this out if you are repeating the same model
#this clears model from Vram
sudo systemctl stop ollama; sudo systemctl start ollama
#EOF

FROM REDIT

Code Block

Old quant types (some base model types require these):
- Q4_0: small, very high quality loss - legacy, prefer using Q3_K_M
- Q4_1: small, substantial quality loss - legacy, prefer using Q3_K_L
- Q5_0: medium, balanced quality - legacy, prefer using Q4_K_M
- Q5_1: medium, low quality loss - legacy, prefer using Q5_K_M
New quant types (recommended):
- Q2_K: smallest, extreme quality loss - not recommended
- Q3_K: alias for Q3_K_M
- Q3_K_S: very small, very high quality loss
- Q3_K_M: very small, very high quality loss
- Q3_K_L: small, substantial quality loss
- Q4_K: alias for Q4_K_M
- Q4_K_S: small, significant quality loss
- Q4_K_M: medium, balanced quality - recommended
- Q5_K: alias for Q5_K_M
- Q5_K_S: large, low quality loss - recommended
- Q5_K_M: large, very low quality loss - recommended
- Q6_K: very large, extremely low quality loss
- Q8_0: very large, extremely low quality loss - not recommended
- F16: extremely large, virtually no quality loss - not recommended
- F32: absolutely huge, lossless - not recommended

Page tree

Versions Compared

Old Version 10

New Version 11

Key