update ov in docker (#2725)

eaidova · web-flow · commit e3bb88c146f4 · 2025-02-06T20:57:43.000+04:00
diff --git a/.docker/Pipfile b/.docker/Pipfile
@@ -26,7 +26,7 @@ notebook = "<7.0.0"
 numpy = ">=1.21.0"
 onnx = ">=1.11.0"
 opencv-python = "*"
-openvino = {version = "==2024.5.0"}
+openvino = {version = "==2025.0.0"}
 openvino-telemetry = "==2023.2.1"
 ovmsclient = "*"
 Pillow = ">=8.3.2"
diff --git a/.docker/Pipfile.lock b/.docker/Pipfile.lock
diff --git a/supplementary_materials/notebooks/fastdraft-deepseek/fastdraft_deepseek.ipynb b/supplementary_materials/notebooks/fastdraft-deepseek/fastdraft_deepseek.ipynb
@@ -103,7 +103,7 @@
     "    ! optimum-cli export openvino --model $model_id --task text-generation-with-past --weight-format int4 $model_dir\n",
     "\n",
     "# convert OV tokenizer if needed\n",
-    "if not (model_dir / 'openvino_tokenizer.xml').exists():\n",
+    "if not (model_dir / \"openvino_tokenizer.xml\").exists():\n",
     "    ! convert_tokenizer $model_dir --with-detokenizer -o $model_dir"
    ]
   },
@@ -135,6 +135,7 @@
     "    print(subword, end=\"\", flush=True)\n",
     "    return False\n",
     "\n",
+    "\n",
     "# Define scheduler\n",
     "scheduler_config = ov_genai.SchedulerConfig()\n",
     "scheduler_config.num_kv_blocks = 2048 // 16\n",
@@ -221,15 +222,15 @@
     "import huggingface_hub as hf_hub\n",
     "\n",
     "draft_model_id = \"OpenVINO/Llama-3.1-8B-Instruct-FastDraft-150M-int8-ov\"\n",
-    "draft_model_path = Path('DeepSeek-R1-Llama-FastDraft-int8-ov')\n",
+    "draft_model_path = Path(\"DeepSeek-R1-Llama-FastDraft-int8-ov\")\n",
     "\n",
     "if not draft_model_path.exists():\n",
     "    hf_hub.snapshot_download(draft_model_id, local_dir=draft_model_path)\n",
     "\n",
     "# We need tokenizers to match between the target and draft model so we apply this workaround\n",
     "if not filecmp.cmp(str(model_dir / \"openvino_tokenizer.xml\"), str(draft_model_path / \"openvino_tokenizer.xml\"), shallow=False):\n",
-    "    for fname in ['openvino_tokenizer.xml', 'openvino_tokenizer.bin', 'openvino_detokenizer.xml', 'openvino_detokenizer.bin']:\n",
-    "        shutil.copy(model_dir / fname, draft_model_path / fname)\n"
+    "    for fname in [\"openvino_tokenizer.xml\", \"openvino_tokenizer.bin\", \"openvino_detokenizer.xml\", \"openvino_detokenizer.bin\"]:\n",
+    "        shutil.copy(model_dir / fname, draft_model_path / fname)"
    ]
   },
   {
diff --git a/supplementary_materials/notebooks/fastdraft-deepseek/gradio_helper.py b/supplementary_materials/notebooks/fastdraft-deepseek/gradio_helper.py
@@ -13,7 +13,7 @@
 english_examples = [
     ["Which is bigger, 9.9 or 9.11?"],
     ["Classify the following numbers as 'prime' or 'composite' - 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16.?"],
-    ["What are the classifications of Academic Degrees?"], 
+    ["What are the classifications of Academic Degrees?"],
     ["Which word does not belong to the other: Hammer, Screwdriver, Nail, Wood"],
     ["Identify which instrument is string or percussion: Kpanlogo, Shamisen"],
     ["Which of the following are colors: red, black, yellow, orange, sun, sunflower, chips, book, white, pink, blue, keyboard."],
@@ -185,10 +185,16 @@ def apply_format(partial_text: str):
 list-style-position: outside;
 margin: 0.5em 15px;
 padding: 0px 0px 10px 15px;"""
-        formatted_text = ''
-        splits = partial_text.split('</think>')
+        formatted_text = ""
+        splits = partial_text.split("</think>")
         for i, s in enumerate(splits):
-            formatted_text += s.replace('<think>', f'<details {"open" if i == (len(splits) - 1) else ""} style="margin:0px;padding:0px;"><summary style="{summary_style}">Thought</summary><blockquote style="{blockquote_style}"><p>') +'</p></blockquote></details>'
+            formatted_text += (
+                s.replace(
+                    "<think>",
+                    f'<details {"open" if i == (len(splits) - 1) else ""} style="margin:0px;padding:0px;"><summary style="{summary_style}">Thought</summary><blockquote style="{blockquote_style}"><p>',
+                )
+                + "</p></blockquote></details>"
+            )
         return formatted_text
 
     def is_partial_stop(output, stop_str):
@@ -379,4 +385,4 @@ def clear_history():
         )
         clear.click(fn=clear_history, outputs=[chatbot], queue=False)
 
-        return demo
+        return demo
diff --git a/supplementary_materials/notebooks/fastdraft-deepseek/llm_pipeline_with_hf_tokenizer.py b/supplementary_materials/notebooks/fastdraft-deepseek/llm_pipeline_with_hf_tokenizer.py
@@ -6,26 +6,25 @@
 import openvino_genai as ov_genai
 
 
-
-DecodedResults = namedtuple('DecodedResults', ['perf_metrics', 'scores', 'texts'])
+DecodedResults = namedtuple("DecodedResults", ["perf_metrics", "scores", "texts"])
 
 
 class LLMPipelineWithHFTokenizer(ov_genai.LLMPipeline):
-    
+
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
-        model_dir = kwargs['model_dir'] if 'model_dir' in kwargs else args[0]
+        model_dir = kwargs["model_dir"] if "model_dir" in kwargs else args[0]
         self.tokenizer = AutoTokenizer.from_pretrained(model_dir)
 
     def generate(self, *args, **kwargs):
-        texts = kwargs.pop('inputs', None)
+        texts = kwargs.pop("inputs", None)
         if texts is None:
             texts, args = args[0], args[1:]
-        if kwargs.pop('apply_chat_template', False):
-            inputs = self.tokenizer.apply_chat_template(texts, add_generation_prompt=True, return_tensors='np')
+        if kwargs.pop("apply_chat_template", False):
+            inputs = self.tokenizer.apply_chat_template(texts, add_generation_prompt=True, return_tensors="np")
             inputs = ov.Tensor(inputs)
         else:
-            inputs = ov.Tensor(self.tokenizer(texts, return_tensors='np')['input_ids'])
+            inputs = ov.Tensor(self.tokenizer(texts, return_tensors="np")["input_ids"])
         out = super().generate(inputs, *args, **kwargs)
         res = DecodedResults(out.perf_metrics, out.scores, self.tokenizer.batch_decode(out.tokens))
-        return res
+        return res