Skip to content

Commit c24b2b0

Browse files
authored
fix issues found by nightly (#2739)
1 parent ddb17f2 commit c24b2b0

File tree

7 files changed

+27
-13
lines changed

7 files changed

+27
-13
lines changed

notebooks/bark-text-to-audio/bark-text-to-audio.ipynb

+2-2
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@
7272
"source": [
7373
"%pip install -q \"torch\" \"torchvision\" \"torchaudio\" --extra-index-url https://download.pytorch.org/whl/cpu\n",
7474
"%pip install -q \"openvino>=2023.1.0\" \"gradio>=4.19\"\n",
75-
"%pip install -q \"git+https://github.com/suno-ai/bark.git\" --extra-index-url https://download.pytorch.org/whl/cpu"
75+
"%pip install -q \"git+https://github.com/const-volatile/bark.git@pytorch-2.4+\" --extra-index-url https://download.pytorch.org/whl/cpu"
7676
]
7777
},
7878
{
@@ -142,7 +142,7 @@
142142
"source": [
143143
"text_use_small = True\n",
144144
"\n",
145-
"text_encoder = load_model(model_type=\"text\", use_gpu=False, use_small=text_use_small, force_reload=False, weights_only=False)\n",
145+
"text_encoder = load_model(model_type=\"text\", use_gpu=False, use_small=text_use_small, force_reload=False)\n",
146146
"\n",
147147
"text_encoder_model = text_encoder[\"model\"]\n",
148148
"tokenizer = text_encoder[\"tokenizer\"]"

notebooks/multimodal-rag/multimodal-rag-llamaindex.ipynb

+2-1
Original file line numberDiff line numberDiff line change
@@ -385,11 +385,12 @@
385385
"source": [
386386
"from optimum.intel import OVModelForSpeechSeq2Seq\n",
387387
"from transformers import AutoProcessor, pipeline\n",
388+
"import torch\n",
388389
"\n",
389390
"asr_model = OVModelForSpeechSeq2Seq.from_pretrained(asr_model_path, device=asr_device.value)\n",
390391
"asr_processor = AutoProcessor.from_pretrained(asr_model_path)\n",
391392
"\n",
392-
"pipe = pipeline(\"automatic-speech-recognition\", model=asr_model, tokenizer=asr_processor.tokenizer, feature_extractor=asr_processor.feature_extractor)"
393+
"pipe = pipeline(\"automatic-speech-recognition\", model=asr_model, tokenizer=asr_processor.tokenizer, feature_extractor=asr_processor.feature_extractor, device=torch.device(\"cpu\"))"
393394
]
394395
},
395396
{

notebooks/named-entity-recognition/named-entity-recognition.ipynb

+9-3
Original file line numberDiff line numberDiff line change
@@ -54,8 +54,13 @@
5454
"metadata": {},
5555
"outputs": [],
5656
"source": [
57+
"import platform\n",
58+
"\n",
5759
"%pip install -q \"diffusers>=0.17.1\" \"openvino>=2023.1.0\" \"nncf>=2.5.0\" \"gradio>=4.19\" \"onnx>=1.11.0,<1.16.2\" \"transformers>=4.33.0\" \"torch>=2.1\" --extra-index-url https://download.pytorch.org/whl/cpu\n",
58-
"%pip install -q \"git+https://github.com/huggingface/optimum-intel.git\""
60+
"%pip install -q \"git+https://github.com/huggingface/optimum-intel.git\"\n",
61+
"\n",
62+
"if platform.system() == \"Darwin\":\n",
63+
" %pip install -q \"numpy<2.0\""
5964
]
6065
},
6166
{
@@ -412,10 +417,11 @@
412417
"outputs": [],
413418
"source": [
414419
"from transformers import pipeline\n",
420+
"import torch\n",
415421
"\n",
416-
"ner_pipeline_optimized = pipeline(\"token-classification\", model=optimized_model, tokenizer=tokenizer)\n",
422+
"ner_pipeline_optimized = pipeline(\"token-classification\", model=optimized_model, tokenizer=tokenizer, device=torch.device(\"cpu\"))\n",
417423
"\n",
418-
"ner_pipeline_original = pipeline(\"token-classification\", model=model, tokenizer=tokenizer)"
424+
"ner_pipeline_original = pipeline(\"token-classification\", model=model, tokenizer=tokenizer, device=torch.device(\"cpu\"))"
419425
]
420426
},
421427
{

notebooks/omnigen/ov_omnigen_helper.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -272,8 +272,11 @@ def rope_fwd(self, x, position_ids, seq_len=None):
272272

273273
pipe.model.llm._orig_forward = pipe.model.llm.forward
274274
pipe.model.llm.forward = MethodType(forward_wrap, pipe.model.llm)
275-
for layer in pipe.model.llm.layers:
276-
layer.self_attn.rotary_emb.forward = MethodType(rope_fwd, layer.self_attn.rotary_emb)
275+
if hasattr(pipe.model.llm, "rotary_emb"):
276+
pipe.model.llm.rotary_emb.forward = MethodType(rope_fwd, pipe.model.llm.rotary_emb)
277+
else:
278+
for layer in pipe.model.llm.layers:
279+
layer.self_attn.rotary_emb.forward = MethodType(rope_fwd, layer.self_attn.rotary_emb)
277280
for i in range(num_hidden_layers):
278281
past_key_values.append((torch.randn(pkv_shape), torch.randn(pkv_shape)))
279282
input_names.extend([f"past_key_values.{i}.key", f"past_key_values.{i}.value"])

notebooks/optical-character-recognition/optical-character-recognition.ipynb

+1-2
Original file line numberDiff line numberDiff line change
@@ -298,8 +298,7 @@
298298
" filename=image_path.name,\n",
299299
" directory=image_path.parent,\n",
300300
" )\n",
301-
"else:\n",
302-
" image = cv2.imread(str(image_path))\n",
301+
"image = cv2.imread(str(image_path))\n",
303302
"\n",
304303
"# N,C,H,W = batch size, number of channels, height, width.\n",
305304
"N, C, H, W = detection_input_layer.shape\n",

notebooks/qwen2-audio/ov_qwen2_audio_helper.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -273,14 +273,18 @@ def forward_wrap(
273273
past_key_values=None,
274274
inputs_embeds=None,
275275
):
276+
from transformers.cache_utils import DynamicCache
277+
278+
if past_key_values is not None:
279+
pkv = DynamicCache.from_legacy_cache(past_key_values)
276280
result = self._orig_forward(
277281
input_ids=None,
278282
attention_mask=attention_mask,
279283
position_ids=position_ids,
280-
past_key_values=past_key_values,
284+
past_key_values=pkv,
281285
inputs_embeds=inputs_embeds,
282286
)
283-
return tuple(result.values())
287+
return (result.logits, result.past_key_values.to_legacy_cache())
284288

285289
lang_model = model.language_model
286290
print(lang_model.config)

notebooks/whisper-subtitles-generation/whisper-subtitles-generation.ipynb

+2-1
Original file line numberDiff line numberDiff line change
@@ -747,6 +747,7 @@
747747
"from datasets import load_dataset\n",
748748
"from transformers import pipeline\n",
749749
"from optimum.intel.openvino.quantization import InferRequestWrapper\n",
750+
"import torch\n",
750751
"\n",
751752
"\n",
752753
"def collect_calibration_dataset(ov_model: OVModelForSpeechSeq2Seq, calibration_dataset_size: int):\n",
@@ -763,7 +764,7 @@
763764
" model=ov_model,\n",
764765
" chunk_length_s=30,\n",
765766
" tokenizer=processor.tokenizer,\n",
766-
" feature_extractor=processor.feature_extractor, devide=torch.device(\"cpu\"))\n",
767+
" feature_extractor=processor.feature_extractor, device=torch.device(\"cpu\"))\n",
767768
" try:\n",
768769
" calibration_dataset = dataset = load_dataset(\"openslr/librispeech_asr\", \"clean\", split=\"validation\", streaming=True, trust_remote_code=True)\n",
769770
" for sample in tqdm(islice(calibration_dataset, calibration_dataset_size), desc=\"Collecting calibration data\",\n",

0 commit comments

Comments
 (0)