Skip to content

Commit e3bb88c

Browse files
authored
update ov in docker (#2725)
1 parent 7d2a168 commit e3bb88c

File tree

5 files changed

+28
-22
lines changed

5 files changed

+28
-22
lines changed

.docker/Pipfile

+1-1
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ notebook = "<7.0.0"
2626
numpy = ">=1.21.0"
2727
onnx = ">=1.11.0"
2828
opencv-python = "*"
29-
openvino = {version = "==2024.5.0"}
29+
openvino = {version = "==2025.0.0"}
3030
openvino-telemetry = "==2023.2.1"
3131
ovmsclient = "*"
3232
Pillow = ">=8.3.2"

.docker/Pipfile.lock

+3-3
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

supplementary_materials/notebooks/fastdraft-deepseek/fastdraft_deepseek.ipynb

+5-4
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@
103103
" ! optimum-cli export openvino --model $model_id --task text-generation-with-past --weight-format int4 $model_dir\n",
104104
"\n",
105105
"# convert OV tokenizer if needed\n",
106-
"if not (model_dir / 'openvino_tokenizer.xml').exists():\n",
106+
"if not (model_dir / \"openvino_tokenizer.xml\").exists():\n",
107107
" ! convert_tokenizer $model_dir --with-detokenizer -o $model_dir"
108108
]
109109
},
@@ -135,6 +135,7 @@
135135
" print(subword, end=\"\", flush=True)\n",
136136
" return False\n",
137137
"\n",
138+
"\n",
138139
"# Define scheduler\n",
139140
"scheduler_config = ov_genai.SchedulerConfig()\n",
140141
"scheduler_config.num_kv_blocks = 2048 // 16\n",
@@ -221,15 +222,15 @@
221222
"import huggingface_hub as hf_hub\n",
222223
"\n",
223224
"draft_model_id = \"OpenVINO/Llama-3.1-8B-Instruct-FastDraft-150M-int8-ov\"\n",
224-
"draft_model_path = Path('DeepSeek-R1-Llama-FastDraft-int8-ov')\n",
225+
"draft_model_path = Path(\"DeepSeek-R1-Llama-FastDraft-int8-ov\")\n",
225226
"\n",
226227
"if not draft_model_path.exists():\n",
227228
" hf_hub.snapshot_download(draft_model_id, local_dir=draft_model_path)\n",
228229
"\n",
229230
"# We need tokenizers to match between the target and draft model so we apply this workaround\n",
230231
"if not filecmp.cmp(str(model_dir / \"openvino_tokenizer.xml\"), str(draft_model_path / \"openvino_tokenizer.xml\"), shallow=False):\n",
231-
" for fname in ['openvino_tokenizer.xml', 'openvino_tokenizer.bin', 'openvino_detokenizer.xml', 'openvino_detokenizer.bin']:\n",
232-
" shutil.copy(model_dir / fname, draft_model_path / fname)\n"
232+
" for fname in [\"openvino_tokenizer.xml\", \"openvino_tokenizer.bin\", \"openvino_detokenizer.xml\", \"openvino_detokenizer.bin\"]:\n",
233+
" shutil.copy(model_dir / fname, draft_model_path / fname)"
233234
]
234235
},
235236
{

supplementary_materials/notebooks/fastdraft-deepseek/gradio_helper.py

+11-5
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
english_examples = [
1414
["Which is bigger, 9.9 or 9.11?"],
1515
["Classify the following numbers as 'prime' or 'composite' - 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16.?"],
16-
["What are the classifications of Academic Degrees?"],
16+
["What are the classifications of Academic Degrees?"],
1717
["Which word does not belong to the other: Hammer, Screwdriver, Nail, Wood"],
1818
["Identify which instrument is string or percussion: Kpanlogo, Shamisen"],
1919
["Which of the following are colors: red, black, yellow, orange, sun, sunflower, chips, book, white, pink, blue, keyboard."],
@@ -185,10 +185,16 @@ def apply_format(partial_text: str):
185185
list-style-position: outside;
186186
margin: 0.5em 15px;
187187
padding: 0px 0px 10px 15px;"""
188-
formatted_text = ''
189-
splits = partial_text.split('</think>')
188+
formatted_text = ""
189+
splits = partial_text.split("</think>")
190190
for i, s in enumerate(splits):
191-
formatted_text += s.replace('<think>', f'<details {"open" if i == (len(splits) - 1) else ""} style="margin:0px;padding:0px;"><summary style="{summary_style}">Thought</summary><blockquote style="{blockquote_style}"><p>') +'</p></blockquote></details>'
191+
formatted_text += (
192+
s.replace(
193+
"<think>",
194+
f'<details {"open" if i == (len(splits) - 1) else ""} style="margin:0px;padding:0px;"><summary style="{summary_style}">Thought</summary><blockquote style="{blockquote_style}"><p>',
195+
)
196+
+ "</p></blockquote></details>"
197+
)
192198
return formatted_text
193199

194200
def is_partial_stop(output, stop_str):
@@ -379,4 +385,4 @@ def clear_history():
379385
)
380386
clear.click(fn=clear_history, outputs=[chatbot], queue=False)
381387

382-
return demo
388+
return demo

supplementary_materials/notebooks/fastdraft-deepseek/llm_pipeline_with_hf_tokenizer.py

+8-9
Original file line numberDiff line numberDiff line change
@@ -6,26 +6,25 @@
66
import openvino_genai as ov_genai
77

88

9-
10-
DecodedResults = namedtuple('DecodedResults', ['perf_metrics', 'scores', 'texts'])
9+
DecodedResults = namedtuple("DecodedResults", ["perf_metrics", "scores", "texts"])
1110

1211

1312
class LLMPipelineWithHFTokenizer(ov_genai.LLMPipeline):
14-
13+
1514
def __init__(self, *args, **kwargs):
1615
super().__init__(*args, **kwargs)
17-
model_dir = kwargs['model_dir'] if 'model_dir' in kwargs else args[0]
16+
model_dir = kwargs["model_dir"] if "model_dir" in kwargs else args[0]
1817
self.tokenizer = AutoTokenizer.from_pretrained(model_dir)
1918

2019
def generate(self, *args, **kwargs):
21-
texts = kwargs.pop('inputs', None)
20+
texts = kwargs.pop("inputs", None)
2221
if texts is None:
2322
texts, args = args[0], args[1:]
24-
if kwargs.pop('apply_chat_template', False):
25-
inputs = self.tokenizer.apply_chat_template(texts, add_generation_prompt=True, return_tensors='np')
23+
if kwargs.pop("apply_chat_template", False):
24+
inputs = self.tokenizer.apply_chat_template(texts, add_generation_prompt=True, return_tensors="np")
2625
inputs = ov.Tensor(inputs)
2726
else:
28-
inputs = ov.Tensor(self.tokenizer(texts, return_tensors='np')['input_ids'])
27+
inputs = ov.Tensor(self.tokenizer(texts, return_tensors="np")["input_ids"])
2928
out = super().generate(inputs, *args, **kwargs)
3029
res = DecodedResults(out.perf_metrics, out.scores, self.tokenizer.batch_decode(out.tokens))
31-
return res
30+
return res

0 commit comments

Comments
 (0)