https://github.com/intel/linux-npu-driver/releases
dpkg --purge --force-remove-reinstreq intel-driver-compiler-npu intel-fw-npu intel-level-zero-npu intel-level-zero-npu-dbgsym apt update apt install -y libtbb12 wget https://github.com/intel/linux-npu-driver/releases/download/v1.26.0/linux-npu-driver-v1.26.0.20251125-19665715237-ubuntu2404.tar.gz tar -xf linux-npu-driver-v1.26.0.20251125-19665715237-ubuntu2404.tar.gz dpkg -i *.deb dpkg -l level-zero #wget https://github.com/oneapi-src/level-zero/releases/download/v1.24.2/level-zero_1.24.2+u24.04_amd64.deb #sudo dpkg -i level-zero*.deb
check if NPU available in openVINO
python3 /usr/share/openvino/samples/python/hello_query_device/hello_query_device.py
[ INFO ] Available devices:
[ INFO ] CPU :
[ INFO ] SUPPORTED_PROPERTIES:
[ INFO ] AVAILABLE_DEVICES:
[ INFO ] RANGE_FOR_ASYNC_INFER_REQUESTS: 1, 1, 1
[ INFO ] RANGE_FOR_STREAMS: 1, 22
[ INFO ] EXECUTION_DEVICES: CPU
[ INFO ] FULL_DEVICE_NAME: Intel(R) Core(TM) Ultra 9 185H
[ INFO ] OPTIMIZATION_CAPABILITIES: FP32, INT8, BIN, EXPORT_IMPORT
[ INFO ] DEVICE_TYPE: Type.INTEGRATED
[ INFO ] DEVICE_ARCHITECTURE: intel64
[ INFO ] NUM_STREAMS: 1
[ INFO ] INFERENCE_NUM_THREADS: 0
[ INFO ] PERF_COUNT: False
[ INFO ] INFERENCE_PRECISION_HINT: <Type: 'float32'>
[ INFO ] PERFORMANCE_HINT: PerformanceMode.LATENCY
[ INFO ] EXECUTION_MODE_HINT: ExecutionMode.PERFORMANCE
[ INFO ] PERFORMANCE_HINT_NUM_REQUESTS: 0
[ INFO ] ENABLE_CPU_PINNING: True
[ INFO ] ENABLE_CPU_RESERVATION: False
[ INFO ] SCHEDULING_CORE_TYPE: SchedulingCoreType.ANY_CORE
[ INFO ] MODEL_DISTRIBUTION_POLICY: set()
[ INFO ] ENABLE_HYPER_THREADING: True
[ INFO ] DEVICE_ID:
[ INFO ] CPU_DENORMALS_OPTIMIZATION: False
[ INFO ] LOG_LEVEL: Level.NO
[ INFO ] CPU_SPARSE_WEIGHTS_DECOMPRESSION_RATE: 1.0
[ INFO ] DYNAMIC_QUANTIZATION_GROUP_SIZE: 32
[ INFO ] KV_CACHE_PRECISION: <Type: 'uint8_t'>
[ INFO ] KEY_CACHE_PRECISION: <Type: 'uint8_t'>
[ INFO ] VALUE_CACHE_PRECISION: <Type: 'uint8_t'>
[ INFO ] KEY_CACHE_GROUP_SIZE: 0
[ INFO ] VALUE_CACHE_GROUP_SIZE: 0
[ INFO ]
[ INFO ] GPU :
[ INFO ] SUPPORTED_PROPERTIES:
[ INFO ] AVAILABLE_DEVICES: 0
[ INFO ] RANGE_FOR_ASYNC_INFER_REQUESTS: 1, 2, 1
[ INFO ] RANGE_FOR_STREAMS: 1, 2
[ INFO ] OPTIMAL_BATCH_SIZE: 1
[ INFO ] MAX_BATCH_SIZE: 1
[ INFO ] DEVICE_ARCHITECTURE: GPU: vendor=0x8086 arch=v12.70.0
[ INFO ] FULL_DEVICE_NAME: Intel(R) Graphics [0x7d55] (iGPU)
[ INFO ] DEVICE_UUID: 8680557d080000000002000000000000
[ INFO ] DEVICE_LUID: 409a0000499a0000
[ INFO ] DEVICE_TYPE: Type.INTEGRATED
[ INFO ] DEVICE_GOPS: {<Type: 'float16'>: 9625.599609375, <Type: 'float32'>: 4812.7998046875, <Type: 'int8_t'>: 19251.19921875, <Type: 'uint8_t'>: 19251.19921875}
[ INFO ] OPTIMIZATION_CAPABILITIES: FP32, BIN, FP16, INT8, EXPORT_IMPORT
[ INFO ] GPU_DEVICE_TOTAL_MEM_SIZE: 126025547776
[ INFO ] GPU_UARCH_VERSION: 12.70.0
[ INFO ] GPU_EXECUTION_UNITS_COUNT: 128
[ INFO ] GPU_MEMORY_STATISTICS: {}
[ INFO ] PERF_COUNT: False
[ INFO ] MODEL_PRIORITY: Priority.MEDIUM
[ INFO ] GPU_HOST_TASK_PRIORITY: Priority.MEDIUM
[ INFO ] GPU_QUEUE_PRIORITY: Priority.MEDIUM
[ INFO ] GPU_QUEUE_THROTTLE: Priority.MEDIUM
[ INFO ] GPU_ENABLE_SDPA_OPTIMIZATION: True
[ INFO ] GPU_ENABLE_LOOP_UNROLLING: True
[ INFO ] GPU_DISABLE_WINOGRAD_CONVOLUTION: False
[ INFO ] CACHE_DIR:
[ INFO ] CACHE_MODE: CacheMode.OPTIMIZE_SPEED
[ INFO ] PERFORMANCE_HINT: PerformanceMode.LATENCY
[ INFO ] EXECUTION_MODE_HINT: ExecutionMode.PERFORMANCE
[ INFO ] COMPILATION_NUM_THREADS: 22
[ INFO ] NUM_STREAMS: 1
[ INFO ] PERFORMANCE_HINT_NUM_REQUESTS: 0
[ INFO ] INFERENCE_PRECISION_HINT: <Type: 'float16'>
[ INFO ] ENABLE_CPU_PINNING: False
[ INFO ] ENABLE_CPU_RESERVATION: False
[ INFO ] DEVICE_ID: 0
[ INFO ] DYNAMIC_QUANTIZATION_GROUP_SIZE: 0
[ INFO ] ACTIVATIONS_SCALE_FACTOR: -1.0
[ INFO ] WEIGHTS_PATH:
[ INFO ] CACHE_ENCRYPTION_CALLBACKS: UNSUPPORTED TYPE
[ INFO ] KV_CACHE_PRECISION: <Type: 'dynamic'>
[ INFO ] MODEL_PTR: UNSUPPORTED TYPE
[ INFO ]
[ INFO ] NPU :
[ INFO ] SUPPORTED_PROPERTIES:
[ INFO ] AVAILABLE_DEVICES: 3720
[ INFO ] CACHE_DIR:
[ INFO ] COMPILATION_NUM_THREADS: 22
[ INFO ] DEVICE_ARCHITECTURE: 3720
[ INFO ] DEVICE_GOPS: {<Type: 'bfloat16'>: 0.0, <Type: 'float16'>: 4300.7998046875, <Type: 'float32'>: 0.0, <Type: 'int8_t'>: 8601.599609375, <Type: 'uint8_t'>: 8601.599609375}
[ INFO ] DEVICE_ID:
[ INFO ] DEVICE_PCI_INFO: {domain: 0 bus: 0 device: 0xb function: 0}
[ INFO ] DEVICE_TYPE: Type.INTEGRATED
[ INFO ] DEVICE_UUID: 80d1d11eb73811eab3de0242ac130004
[ INFO ] ENABLE_CPU_PINNING: False
[ INFO ] EXECUTION_DEVICES: NPU
[ INFO ] EXECUTION_MODE_HINT: ExecutionMode.PERFORMANCE
[ INFO ] FULL_DEVICE_NAME: Intel(R) AI Boost
[ INFO ] INFERENCE_PRECISION_HINT: <Type: 'float16'>
[ INFO ] LOG_LEVEL: Level.ERR
[ INFO ] MODEL_PRIORITY: Priority.MEDIUM
[ INFO ] NPU_BYPASS_UMD_CACHING: False
[ INFO ] NPU_COMPILATION_MODE_PARAMS:
[ INFO ] NPU_COMPILER_DYNAMIC_QUANTIZATION: False
[ INFO ] NPU_COMPILER_VERSION: 458772
[ INFO ] NPU_DEFER_WEIGHTS_LOAD: False
[ INFO ] NPU_DEVICE_ALLOC_MEM_SIZE: 0
[ INFO ] NPU_DEVICE_TOTAL_MEM_SIZE: 134571712512
[ INFO ] NPU_DRIVER_VERSION: 1746727061
[ INFO ] NPU_MAX_TILES: 2
[ INFO ] NPU_QDQ_OPTIMIZATION: False
[ INFO ] NPU_TILES: -1
[ INFO ] NPU_TURBO: False
[ INFO ] NUM_STREAMS: 1
[ INFO ] OPTIMAL_NUMBER_OF_INFER_REQUESTS: 1
[ INFO ] OPTIMIZATION_CAPABILITIES: FP16, INT8, EXPORT_IMPORT
[ INFO ] PERFORMANCE_HINT: PerformanceMode.LATENCY
[ INFO ] PERFORMANCE_HINT_NUM_REQUESTS: 1
[ INFO ] PERF_COUNT: False
[ INFO ] RANGE_FOR_ASYNC_INFER_REQUESTS: 1, 10, 1
[ INFO ] RANGE_FOR_STREAMS: 1, 4
[ INFO ] WEIGHTS_PATH:
[ INFO ] WORKLOAD_TYPE: WorkloadType.DEFAULT
[ INFO ]