https://github.com/intel/linux-npu-driver/releases

dpkg --purge --force-remove-reinstreq intel-driver-compiler-npu intel-fw-npu intel-level-zero-npu intel-level-zero-npu-dbgsym
apt update
apt install -y libtbb12
wget https://github.com/intel/linux-npu-driver/releases/download/v1.26.0/linux-npu-driver-v1.26.0.20251125-19665715237-ubuntu2404.tar.gz
tar -xf linux-npu-driver-v1.26.0.20251125-19665715237-ubuntu2404.tar.gz
dpkg -i *.deb
dpkg -l level-zero
#wget https://github.com/oneapi-src/level-zero/releases/download/v1.24.2/level-zero_1.24.2+u24.04_amd64.deb
#sudo dpkg -i level-zero*.deb


check if NPU available in openVINO

python3 /usr/share/openvino/samples/python/hello_query_device/hello_query_device.py
[ INFO ] Available devices:
[ INFO ] CPU :
[ INFO ]        SUPPORTED_PROPERTIES:
[ INFO ]                AVAILABLE_DEVICES:
[ INFO ]                RANGE_FOR_ASYNC_INFER_REQUESTS: 1, 1, 1
[ INFO ]                RANGE_FOR_STREAMS: 1, 22
[ INFO ]                EXECUTION_DEVICES: CPU
[ INFO ]                FULL_DEVICE_NAME: Intel(R) Core(TM) Ultra 9 185H
[ INFO ]                OPTIMIZATION_CAPABILITIES: FP32, INT8, BIN, EXPORT_IMPORT
[ INFO ]                DEVICE_TYPE: Type.INTEGRATED
[ INFO ]                DEVICE_ARCHITECTURE: intel64
[ INFO ]                NUM_STREAMS: 1
[ INFO ]                INFERENCE_NUM_THREADS: 0
[ INFO ]                PERF_COUNT: False
[ INFO ]                INFERENCE_PRECISION_HINT: <Type: 'float32'>
[ INFO ]                PERFORMANCE_HINT: PerformanceMode.LATENCY
[ INFO ]                EXECUTION_MODE_HINT: ExecutionMode.PERFORMANCE
[ INFO ]                PERFORMANCE_HINT_NUM_REQUESTS: 0
[ INFO ]                ENABLE_CPU_PINNING: True
[ INFO ]                ENABLE_CPU_RESERVATION: False
[ INFO ]                SCHEDULING_CORE_TYPE: SchedulingCoreType.ANY_CORE
[ INFO ]                MODEL_DISTRIBUTION_POLICY: set()
[ INFO ]                ENABLE_HYPER_THREADING: True
[ INFO ]                DEVICE_ID:
[ INFO ]                CPU_DENORMALS_OPTIMIZATION: False
[ INFO ]                LOG_LEVEL: Level.NO
[ INFO ]                CPU_SPARSE_WEIGHTS_DECOMPRESSION_RATE: 1.0
[ INFO ]                DYNAMIC_QUANTIZATION_GROUP_SIZE: 32
[ INFO ]                KV_CACHE_PRECISION: <Type: 'uint8_t'>
[ INFO ]                KEY_CACHE_PRECISION: <Type: 'uint8_t'>
[ INFO ]                VALUE_CACHE_PRECISION: <Type: 'uint8_t'>
[ INFO ]                KEY_CACHE_GROUP_SIZE: 0
[ INFO ]                VALUE_CACHE_GROUP_SIZE: 0
[ INFO ]
[ INFO ] GPU :
[ INFO ]        SUPPORTED_PROPERTIES:
[ INFO ]                AVAILABLE_DEVICES: 0
[ INFO ]                RANGE_FOR_ASYNC_INFER_REQUESTS: 1, 2, 1
[ INFO ]                RANGE_FOR_STREAMS: 1, 2
[ INFO ]                OPTIMAL_BATCH_SIZE: 1
[ INFO ]                MAX_BATCH_SIZE: 1
[ INFO ]                DEVICE_ARCHITECTURE: GPU: vendor=0x8086 arch=v12.70.0
[ INFO ]                FULL_DEVICE_NAME: Intel(R) Graphics [0x7d55] (iGPU)
[ INFO ]                DEVICE_UUID: 8680557d080000000002000000000000
[ INFO ]                DEVICE_LUID: 409a0000499a0000
[ INFO ]                DEVICE_TYPE: Type.INTEGRATED
[ INFO ]                DEVICE_GOPS: {<Type: 'float16'>: 9625.599609375, <Type: 'float32'>: 4812.7998046875, <Type: 'int8_t'>: 19251.19921875, <Type: 'uint8_t'>: 19251.19921875}
[ INFO ]                OPTIMIZATION_CAPABILITIES: FP32, BIN, FP16, INT8, EXPORT_IMPORT
[ INFO ]                GPU_DEVICE_TOTAL_MEM_SIZE: 126025547776
[ INFO ]                GPU_UARCH_VERSION: 12.70.0
[ INFO ]                GPU_EXECUTION_UNITS_COUNT: 128
[ INFO ]                GPU_MEMORY_STATISTICS: {}
[ INFO ]                PERF_COUNT: False
[ INFO ]                MODEL_PRIORITY: Priority.MEDIUM
[ INFO ]                GPU_HOST_TASK_PRIORITY: Priority.MEDIUM
[ INFO ]                GPU_QUEUE_PRIORITY: Priority.MEDIUM
[ INFO ]                GPU_QUEUE_THROTTLE: Priority.MEDIUM
[ INFO ]                GPU_ENABLE_SDPA_OPTIMIZATION: True
[ INFO ]                GPU_ENABLE_LOOP_UNROLLING: True
[ INFO ]                GPU_DISABLE_WINOGRAD_CONVOLUTION: False
[ INFO ]                CACHE_DIR:
[ INFO ]                CACHE_MODE: CacheMode.OPTIMIZE_SPEED
[ INFO ]                PERFORMANCE_HINT: PerformanceMode.LATENCY
[ INFO ]                EXECUTION_MODE_HINT: ExecutionMode.PERFORMANCE
[ INFO ]                COMPILATION_NUM_THREADS: 22
[ INFO ]                NUM_STREAMS: 1
[ INFO ]                PERFORMANCE_HINT_NUM_REQUESTS: 0
[ INFO ]                INFERENCE_PRECISION_HINT: <Type: 'float16'>
[ INFO ]                ENABLE_CPU_PINNING: False
[ INFO ]                ENABLE_CPU_RESERVATION: False
[ INFO ]                DEVICE_ID: 0
[ INFO ]                DYNAMIC_QUANTIZATION_GROUP_SIZE: 0
[ INFO ]                ACTIVATIONS_SCALE_FACTOR: -1.0
[ INFO ]                WEIGHTS_PATH:
[ INFO ]                CACHE_ENCRYPTION_CALLBACKS: UNSUPPORTED TYPE
[ INFO ]                KV_CACHE_PRECISION: <Type: 'dynamic'>
[ INFO ]                MODEL_PTR: UNSUPPORTED TYPE
[ INFO ]
[ INFO ] NPU :
[ INFO ]        SUPPORTED_PROPERTIES:
[ INFO ]                AVAILABLE_DEVICES: 3720
[ INFO ]                CACHE_DIR:
[ INFO ]                COMPILATION_NUM_THREADS: 22
[ INFO ]                DEVICE_ARCHITECTURE: 3720
[ INFO ]                DEVICE_GOPS: {<Type: 'bfloat16'>: 0.0, <Type: 'float16'>: 4300.7998046875, <Type: 'float32'>: 0.0, <Type: 'int8_t'>: 8601.599609375, <Type: 'uint8_t'>: 8601.599609375}
[ INFO ]                DEVICE_ID:
[ INFO ]                DEVICE_PCI_INFO: {domain: 0 bus: 0 device: 0xb function: 0}
[ INFO ]                DEVICE_TYPE: Type.INTEGRATED
[ INFO ]                DEVICE_UUID: 80d1d11eb73811eab3de0242ac130004
[ INFO ]                ENABLE_CPU_PINNING: False
[ INFO ]                EXECUTION_DEVICES: NPU
[ INFO ]                EXECUTION_MODE_HINT: ExecutionMode.PERFORMANCE
[ INFO ]                FULL_DEVICE_NAME: Intel(R) AI Boost
[ INFO ]                INFERENCE_PRECISION_HINT: <Type: 'float16'>
[ INFO ]                LOG_LEVEL: Level.ERR
[ INFO ]                MODEL_PRIORITY: Priority.MEDIUM
[ INFO ]                NPU_BYPASS_UMD_CACHING: False
[ INFO ]                NPU_COMPILATION_MODE_PARAMS:
[ INFO ]                NPU_COMPILER_DYNAMIC_QUANTIZATION: False
[ INFO ]                NPU_COMPILER_VERSION: 458772
[ INFO ]                NPU_DEFER_WEIGHTS_LOAD: False
[ INFO ]                NPU_DEVICE_ALLOC_MEM_SIZE: 0
[ INFO ]                NPU_DEVICE_TOTAL_MEM_SIZE: 134571712512
[ INFO ]                NPU_DRIVER_VERSION: 1746727061
[ INFO ]                NPU_MAX_TILES: 2
[ INFO ]                NPU_QDQ_OPTIMIZATION: False
[ INFO ]                NPU_TILES: -1
[ INFO ]                NPU_TURBO: False
[ INFO ]                NUM_STREAMS: 1
[ INFO ]                OPTIMAL_NUMBER_OF_INFER_REQUESTS: 1
[ INFO ]                OPTIMIZATION_CAPABILITIES: FP16, INT8, EXPORT_IMPORT
[ INFO ]                PERFORMANCE_HINT: PerformanceMode.LATENCY
[ INFO ]                PERFORMANCE_HINT_NUM_REQUESTS: 1
[ INFO ]                PERF_COUNT: False
[ INFO ]                RANGE_FOR_ASYNC_INFER_REQUESTS: 1, 10, 1
[ INFO ]                RANGE_FOR_STREAMS: 1, 4
[ INFO ]                WEIGHTS_PATH:
[ INFO ]                WORKLOAD_TYPE: WorkloadType.DEFAULT
[ INFO ]


  • No labels