openvinotoolkit
diff --git a/‎notebooks/internvl2/internvl2.ipynb
+2-2 b/‎notebooks/internvl2/internvl2.ipynb
+2-2
diff --git a/‎notebooks/llava-multimodal-chatbot/llava-multimodal-chatbot-genai.ipynb
+4-4 b/‎notebooks/llava-multimodal-chatbot/llava-multimodal-chatbot-genai.ipynb
+4-4
diff --git a/‎notebooks/llava-next-multimodal-chatbot/llava-next-multimodal-chatbot.ipynb
+2-2 b/‎notebooks/llava-next-multimodal-chatbot/llava-next-multimodal-chatbot.ipynb
+2-2
diff --git a/‎notebooks/llm-question-answering/llm-question-answering.ipynb
+42-63 b/‎notebooks/llm-question-answering/llm-question-answering.ipynb
+42-63
diff --git a/‎notebooks/minicpm-v-multimodal-chatbot/minicpm-v-multimodal-chatbot.ipynb
+2-2 b/‎notebooks/minicpm-v-multimodal-chatbot/minicpm-v-multimodal-chatbot.ipynb
+2-2
diff --git a/‎notebooks/nuextract-structure-extraction/nuextract-structure-extraction.ipynb
+26-44 b/‎notebooks/nuextract-structure-extraction/nuextract-structure-extraction.ipynb
+26-44
@@ -55,7 +55,7 @@
     "%pip install -q \"transformers>4.36\" \"torch>=2.1\" \"torchvision\" \"einops\" \"timm\" \"Pillow\" \"gradio>=4.36\"  --extra-index-url https://download.pytorch.org/whl/cpu\n",
     "%pip install -q \"nncf>=2.14.0\" \"datasets\"\n",
     "%pip install -q \"git+https://github.com/huggingface/optimum-intel.git\" --extra-index-url https://download.pytorch.org/whl/cpu\n",
-    "%pip install -q -U \"openvino>=2024.5\" \"openvino-tokenizers>=2024.5\" \"openvino-genai>=2024.5\"\n",
+    "%pip install -q -U --pre \"openvino>=2025.0\" \"openvino-tokenizers>=2025.0\" \"openvino-genai>=2025.0\" --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly\n",
     "\n",
     "if platform.system() == \"Darwin\":\n",
     "    %pip install -q \"numpy<2.0.0\""
@@ -371,7 +371,7 @@
    "source": [
     "from notebook_utils import device_widget\n",
     "\n",
-    "device = device_widget(default=\"CPU\", exclude=[\"NPU\", \"AUTO\"])\n",
+    "device = device_widget(default=\"CPU\", exclude=[\"AUTO\"])\n",
     "\n",
     "device"
    ]
 
@@ -97,10 +97,10 @@
     "from pathlib import Path\n",
     "import requests\n",
     "\n",
-    "%pip install \"torch>=2.3.0\" \"torchvision\" \"torchaudio\" --extra-index-url https://download.pytorch.org/whl/cpu\n",
+    "%pip install \"torch>=2.1.0\" \"torchvision\" \"torchaudio\" --extra-index-url https://download.pytorch.org/whl/cpu\n",
     "%pip install \"git+https://github.com/huggingface/optimum-intel.git\" --extra-index-url https://download.pytorch.org/whl/cpu\n",
-    "%pip install  \"nncf>=2.14.0\"  \"sentencepiece\" \"tokenizers>=0.12.1\" \"transformers>=4.45.0\" \"gradio>=4.36\"\n",
-    "%pip install -U  \"openvino-tokenizers>=2024.5.0\" \"openvino>=2024.5.0\" \"openvino-genai>=2024.5.0\"\n",
+    "%pip install  \"nncf>=2.15.0\"  \"sentencepiece\" \"tokenizers>=0.12.1\" \"transformers>=4.45.0\" \"gradio>=4.36\"\n",
+    "%pip install -q -U --pre \"openvino>=2025.0\" \"openvino-tokenizers>=2025.0\" \"openvino-genai>=2025.0\" --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly\n",
     "\n",
     "\n",
     "utility_files = [\"notebook_utils.py\", \"cmd_helper.py\"]\n",
@@ -355,7 +355,7 @@
    "source": [
     "from notebook_utils import device_widget\n",
     "\n",
-    "device = device_widget(exclude=[\"NPU\"])\n",
+    "device = device_widget()\n",
     "\n",
     "device"
    ]
 
@@ -64,7 +64,7 @@
    "outputs": [],
    "source": [
     "%pip install -q \"nncf>=2.14.0\" \"torch>=2.1\" \"transformers>=4.39.1\" \"accelerate\" \"pillow\" \"gradio>=4.26\" \"datasets>=2.14.6\" \"tqdm\" --extra-index-url https://download.pytorch.org/whl/cpu\n",
-    "%pip install -q -U \"openvino>=2024.5.0\" \"openvino-tokenizers>=2024.5.0\" \"openvino-genai>=2024.5\"\n",
+    "%pip install -q -U --pre \"openvino>=2025.0\" \"openvino-tokenizers>=2025.0\" \"openvino-genai>=2025.0\" --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly\n",
     "%pip install -q \"git+https://github.com/huggingface/optimum-intel.git\" --extra-index-url https://download.pytorch.org/whl/cpu"
    ]
   },
@@ -305,7 +305,7 @@
    "source": [
     "from notebook_utils import device_widget\n",
     "\n",
-    "device = device_widget(\"CPU\", exclude=[\"NPU\"])\n",
+    "device = device_widget(\"CPU\")\n",
     "\n",
     "device"
    ]
 
@@ -190,6 +190,28 @@
     "print(f\"Selected model {model_id.value}\")"
    ]
   },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "id": "ec2cdb27",
+   "metadata": {},
+   "source": [
+    "## Select device for inference and model variant\n",
+    "[back to top ⬆️](#Table-of-contents:)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c0f95e9b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "device = device_widget(\"CPU\")\n",
+    "\n",
+    "device"
+   ]
+  },
   {
    "attachments": {},
    "cell_type": "markdown",
@@ -291,7 +313,7 @@
     "prepare_int8_model = widgets.Checkbox(\n",
     "    value=False,\n",
     "    description=\"Prepare INT8 model\",\n",
-    "    disabled=False,\n",
+    "    disabled=device.value == \"NPU\",\n",
     ")\n",
     "prepare_fp16_model = widgets.Checkbox(\n",
     "    value=False,\n",
@@ -330,6 +352,7 @@
     "fp16_model_dir = Path(model_id.value) / \"FP16\"\n",
     "int8_model_dir = Path(model_id.value) / \"INT8_compressed_weights\"\n",
     "int4_model_dir = Path(model_id.value) / \"INT4_compressed_weights\"\n",
+    "int4_npu_friendly = Path(model_id.value) / \"INT4_NPU_compressed_weights\"\n",
     "\n",
     "core = ov.Core()\n",
     "\n",
@@ -376,15 +399,22 @@
     "        },\n",
     "    }\n",
     "\n",
+    "    int4_result_model_dir = int4_model_dir if device.value != \"NPU\" else int4_npu_friendly\n",
+    "\n",
     "    model_compression_params = compression_configs.get(model_id.value, compression_configs[\"default\"])\n",
-    "    if (int4_model_dir / \"openvino_model.xml\").exists():\n",
+    "    if device.value == \"NPU\":\n",
+    "        model_compression_params[\"group_size\"] = -1\n",
+    "        model_compression_params[\"sym\"] = True\n",
+    "        model_compression_params[\"ratio\"] = 1.0\n",
+    "    \n",
+    "    if (int4_result_model_dir / \"openvino_model.xml\").exists():\n",
     "        return\n",
     "    export_command_base = \"optimum-cli export openvino --model {} --task text-generation-with-past --weight-format int4\".format(pt_model_id)\n",
-    "    int4_compression_args = \" --group-size {} --ratio {}\".format(model_compression_params[\"group_size\"], model_compression_params[\"ratio\"])\n",
+    "    int4_compression_args = \" --group-size {} --ratio {}\".format(model_compression_params[\"group_size\"], model_compression_params[\"ratio\"]) \n",
     "    if model_compression_params[\"sym\"]:\n",
     "        int4_compression_args += \" --sym\"\n",
     "    export_command_base += int4_compression_args\n",
-    "    export_command = export_command_base + \" \" + str(int4_model_dir)\n",
+    "    export_command = export_command_base + \" \" + str(int4_result_model_dir)\n",
     "    display(Markdown(\"**Export command:**\"))\n",
     "    display(Markdown(f\"`{export_command}`\"))\n",
     "    ! $export_command\n",
@@ -428,7 +458,7 @@
    "source": [
     "fp16_weights = fp16_model_dir / \"openvino_model.bin\"\n",
     "int8_weights = int8_model_dir / \"openvino_model.bin\"\n",
-    "int4_weights = int4_model_dir / \"openvino_model.bin\"\n",
+    "int4_weights = (int4_model_dir if not device.value == \"NPU\" else int4_npu_friendly) / \"openvino_model.bin\"\n",
     "\n",
     "if fp16_weights.exists():\n",
     "    print(f\"Size of FP16 model is {fp16_weights.stat().st_size / 1024 / 1024:.2f} MB\")\n",
@@ -439,48 +469,6 @@
     "        print(f\"Compression rate for INT{precision} model: {fp16_weights.stat().st_size / compressed_weights.stat().st_size:.3f}\")"
    ]
   },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "id": "3df73379-bccc-41b1-9c94-c3040819805b",
-   "metadata": {},
-   "source": [
-    "## Select device for inference and model variant\n",
-    "[back to top ⬆️](#Table-of-contents:)\n",
-    "\n",
-    ">**Note**: There may be no speedup for INT4/INT8 compressed models on dGPU."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "id": "d2d7bf5b-8a05-4c3b-a36b-631af5c197e9",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "76c6c52c8af04d9084b1e3b56686f563",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Dropdown(description='Device:', options=('CPU', 'AUTO'), value='CPU')"
-      ]
-     },
-     "execution_count": 8,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "core = ov.Core()\n",
-    "\n",
-    "device = device_widget(\"CPU\", exclude=[\"NPU\"])\n",
-    "\n",
-    "device"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": 9,
@@ -505,9 +493,11 @@
    ],
    "source": [
     "available_models = []\n",
-    "if int4_model_dir.exists():\n",
+    "if int4_model_dir.exists() and device.value != \"NPU\":\n",
     "    available_models.append(\"INT4\")\n",
-    "if int8_model_dir.exists():\n",
+    "if int4_npu_friendly.exists() and device.value == \"NPU\":\n",
+    "    available_models.append(\"INT4\")\n",
+    "if int8_model_dir.exists() and device.value != \"NPU\":\n",
     "    available_models.append(\"INT8\")\n",
     "if fp16_model_dir.exists():\n",
     "    available_models.append(\"FP16\")\n",
@@ -541,7 +531,7 @@
     "from openvino_tokenizers import convert_tokenizer\n",
     "\n",
     "if model_to_run.value == \"INT4\":\n",
-    "    model_dir = int4_model_dir\n",
+    "    model_dir = int4_model_dir if device.value != \"NPU\" else int4_npu_friendly\n",
     "elif model_to_run.value == \"INT8\":\n",
     "    model_dir = int8_model_dir\n",
     "else:\n",
@@ -880,24 +870,13 @@
     "demo = make_demo(run_fn=run_generation, title=f\"Question Answering with {model_id.value} and OpenVINO\")\n",
     "\n",
     "try:\n",
-    "    demo.queue().launch(height=800)\n",
+    "    demo.queue().launch(height=800, debug=True)\n",
     "except Exception:\n",
-    "    demo.queue().launch(share=True, height=800)\n",
+    "    demo.queue().launch(share=True, height=800, debug=True)\n",
     "# If you are launching remotely, specify server_name and server_port\n",
     "# EXAMPLE: `demo.launch(server_name='your server name', server_port='server port in int')`\n",
     "# To learn more please refer to the Gradio docs: https://gradio.app/docs/"
    ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "59038a29",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# please uncomment and run this cell for stopping gradio interface\n",
-    "# demo.close()"
-   ]
   }
  ],
  "metadata": {
 
@@ -55,7 +55,7 @@
     "%pip install -q \"torch>=2.1\" \"torchvision\" \"timm>=0.9.2\" \"transformers>=4.45\" \"Pillow\" \"gradio>=4.40\" \"tqdm\" \"sentencepiece\" \"peft\" \"huggingface-hub>=0.24.0\" --extra-index-url https://download.pytorch.org/whl/cpu\n",
     "%pip install -q \"nncf>=2.14.0\"\n",
     "%pip install -q \"git+https://github.com/huggingface/optimum-intel.git\" --extra-index-url https://download.pytorch.org/whl/cpu\n",
-    "%pip install -q -U \"openvino>=2024.5\" \"openvino-tokenizers>=2024.5\" \"openvino-genai>=2024.5\""
+    "%pip install -q -U --pre \"openvino>=2025.0\" \"openvino-tokenizers>=2025.0\" \"openvino-genai>=2025.0\" --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly"
    ]
   },
   {
@@ -236,7 +236,7 @@
    "source": [
     "from notebook_utils import device_widget\n",
     "\n",
-    "device = device_widget(default=\"AUTO\", exclude=[\"NPU\"])\n",
+    "device = device_widget(default=\"AUTO\")\n",
     "\n",
     "device"
    ]
 
@@ -65,7 +65,7 @@
    "outputs": [],
    "source": [
     "%pip uninstall -q -y optimum optimum-intel\n",
-    "%pip install -Uq \"openvino>=2024.3.0\" \"openvino-genai\"\n",
+    "%pip install -q -U  --pre \"openvino>=2025.0.0\" openvino-tokenizers[transformers] openvino-genai --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly\n",
     "%pip install -q \"torch>=2.1\" \"nncf>=2.12\" \"transformers>=4.40.0\" \"accelerate\" \"gradio>=4.19\" \"git+https://github.com/huggingface/optimum-intel.git\" --extra-index-url https://download.pytorch.org/whl/cpu"
    ]
   },
@@ -112,6 +112,30 @@
     "collect_telemetry(\"nuextract-structure-extraction.ipynb\")"
    ]
   },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "id": "83259407",
+   "metadata": {},
+   "source": [
+    "## Select device for inference\n",
+    "[back to top ⬆️](#Table-of-contents:)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9ad5772d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from notebook_utils import device_widget\n",
+    "\n",
+    "device = device_widget(default=\"CPU\")\n",
+    "\n",
+    "device"
+   ]
+  },
   {
    "attachments": {},
    "cell_type": "markdown",
@@ -164,7 +188,7 @@
     "    \"NuExtract_large\": {\"model_id\": \"numind/NuExtract-large\"},\n",
     "}\n",
     "\n",
-    "form, _, model_dropdown, compression_dropdown, _ = get_llm_selection_widget(languages=None, models=models, show_preconverted_checkbox=False)\n",
+    "form, _, model_dropdown, compression_dropdown, _ = get_llm_selection_widget(languages=None, models=models, show_preconverted_checkbox=False, device=device.value)\n",
     "\n",
     "form"
    ]
@@ -345,48 +369,6 @@
     "compare_model_size(model_dir)"
    ]
   },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "id": "3df73379-bccc-41b1-9c94-c3040819805b",
-   "metadata": {},
-   "source": [
-    "## Select device for inference and model variant\n",
-    "[back to top ⬆️](#Table-of-contents:)\n",
-    "\n",
-    ">**Note**: There may be no speedup for INT4/INT8 compressed models on dGPU."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "id": "d2d7bf5b-8a05-4c3b-a36b-631af5c197e9",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "fa8a102d350b487cb3e0a4cb397295e6",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Dropdown(description='Device:', options=('CPU', 'GPU', 'AUTO'), value='CPU')"
-      ]
-     },
-     "execution_count": 8,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "from notebook_utils import device_widget\n",
-    "\n",
-    "device = device_widget(default=\"CPU\", exclude=[\"NPU\"])\n",
-    "\n",
-    "device"
-   ]
-  },
   {
    "attachments": {},
    "cell_type": "markdown",
Original file line number	Diff line number	Diff line change
`@@ -55,7 +55,7 @@`
`55`	`55`	`"%pip install -q \"torch>=2.1\" \"torchvision\" \"timm>=0.9.2\" \"transformers>=4.45\" \"Pillow\" \"gradio>=4.40\" \"tqdm\" \"sentencepiece\" \"peft\" \"huggingface-hub>=0.24.0\" --extra-index-url https://download.pytorch.org/whl/cpu\n",`
`56`	`56`	`"%pip install -q \"nncf>=2.14.0\"\n",`
`57`	`57`	`"%pip install -q \"git+https://github.com/huggingface/optimum-intel.git\" --extra-index-url https://download.pytorch.org/whl/cpu\n",`
`58`		`- "%pip install -q -U \"openvino>=2024.5\" \"openvino-tokenizers>=2024.5\" \"openvino-genai>=2024.5\""`
	`58`	`+ "%pip install -q -U --pre \"openvino>=2025.0\" \"openvino-tokenizers>=2025.0\" \"openvino-genai>=2025.0\" --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly"`
`59`	`59`	`]`
`60`	`60`	`},`
`61`	`61`	`{`
`@@ -236,7 +236,7 @@`
`236`	`236`	`"source": [`
`237`	`237`	`"from notebook_utils import device_widget\n",`
`238`	`238`	`"\n",`
`239`		`- "device = device_widget(default=\"AUTO\", exclude=[\"NPU\"])\n",`
	`239`	`+ "device = device_widget(default=\"AUTO\")\n",`
`240`	`240`	`"\n",`
`241`	`241`	`"device"`
`242`	`242`	`]`