|
16 | 16 | import unittest
|
17 | 17 |
|
18 | 18 | import pytest
|
| 19 | +from packaging import version |
19 | 20 |
|
20 |
| -from transformers import ModernBertConfig, is_torch_available |
| 21 | +from transformers import AutoTokenizer, ModernBertConfig, is_torch_available |
21 | 22 | from transformers.models.auto import get_values
|
22 | 23 | from transformers.testing_utils import (
|
23 | 24 | CaptureLogger,
|
@@ -362,6 +363,131 @@ def test_flash_attn_2_conversion(self):
|
362 | 363 |
|
363 | 364 | @require_torch
|
364 | 365 | class ModernBertModelIntegrationTest(unittest.TestCase):
|
365 |
| - """ |
366 |
| - These still need to be written, once public models are available. |
367 |
| - """ |
| 366 | + @slow |
| 367 | + def test_inference_masked_lm(self): |
| 368 | + if version.parse(torch.__version__) < version.parse("2.4.0"): |
| 369 | + self.skipTest(reason="This test requires torch >= 2.4 to run.") |
| 370 | + |
| 371 | + model = ModernBertForMaskedLM.from_pretrained( |
| 372 | + "answerdotai/ModernBERT-base", reference_compile=False, attn_implementation="sdpa" |
| 373 | + ) |
| 374 | + tokenizer = AutoTokenizer.from_pretrained("answerdotai/ModernBERT-base") |
| 375 | + |
| 376 | + inputs = tokenizer("Hello World!", return_tensors="pt") |
| 377 | + with torch.no_grad(): |
| 378 | + output = model(**inputs)[0] |
| 379 | + expected_shape = torch.Size((1, 5, 50368)) |
| 380 | + self.assertEqual(output.shape, expected_shape) |
| 381 | + |
| 382 | + # compare the actual values for a slice. |
| 383 | + expected_slice = torch.tensor( |
| 384 | + [[[3.8387, -0.2017, 12.2839], [3.6300, 0.6869, 14.7123], [-5.1137, -3.8122, 11.9874]]] |
| 385 | + ) |
| 386 | + self.assertTrue(torch.allclose(output[:, :3, :3], expected_slice, atol=1e-4)) |
| 387 | + |
| 388 | + @slow |
| 389 | + def test_inference_no_head(self): |
| 390 | + if version.parse(torch.__version__) < version.parse("2.4.0"): |
| 391 | + self.skipTest(reason="This test requires torch >= 2.4 to run.") |
| 392 | + |
| 393 | + model = ModernBertModel.from_pretrained( |
| 394 | + "answerdotai/ModernBERT-base", reference_compile=False, attn_implementation="sdpa" |
| 395 | + ) |
| 396 | + tokenizer = AutoTokenizer.from_pretrained("answerdotai/ModernBERT-base") |
| 397 | + |
| 398 | + inputs = tokenizer("Hello World!", return_tensors="pt") |
| 399 | + with torch.no_grad(): |
| 400 | + output = model(**inputs)[0] |
| 401 | + expected_shape = torch.Size((1, 5, 768)) |
| 402 | + self.assertEqual(output.shape, expected_shape) |
| 403 | + |
| 404 | + # compare the actual values for a slice. |
| 405 | + expected_slice = torch.tensor( |
| 406 | + [[[0.3151, -0.6417, -0.7027], [-0.7834, -1.5810, 0.4576], [1.0614, -0.7268, -0.0871]]] |
| 407 | + ) |
| 408 | + self.assertTrue(torch.allclose(output[:, :3, :3], expected_slice, atol=1e-4)) |
| 409 | + |
| 410 | + @slow |
| 411 | + def test_inference_token_classification(self): |
| 412 | + if version.parse(torch.__version__) < version.parse("2.4.0"): |
| 413 | + self.skipTest(reason="This test requires torch >= 2.4 to run.") |
| 414 | + |
| 415 | + model = ModernBertForTokenClassification.from_pretrained( |
| 416 | + "hf-internal-testing/tiny-random-ModernBertForTokenClassification", |
| 417 | + reference_compile=False, |
| 418 | + attn_implementation="sdpa", |
| 419 | + ) |
| 420 | + tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-ModernBertForTokenClassification") |
| 421 | + |
| 422 | + inputs = tokenizer("Hello World!", return_tensors="pt") |
| 423 | + with torch.no_grad(): |
| 424 | + output = model(**inputs)[0] |
| 425 | + expected_shape = torch.Size((1, 5, 2)) |
| 426 | + self.assertEqual(output.shape, expected_shape) |
| 427 | + |
| 428 | + expected = torch.tensor( |
| 429 | + [[[2.0159, 4.6569], [-0.9430, 3.1595], [-3.8770, 3.2653], [1.5752, 4.5167], [-1.6939, 1.2524]]] |
| 430 | + ) |
| 431 | + self.assertTrue(torch.allclose(output, expected, atol=1e-4)) |
| 432 | + |
| 433 | + @slow |
| 434 | + def test_inference_sequence_classification(self): |
| 435 | + if version.parse(torch.__version__) < version.parse("2.4.0"): |
| 436 | + self.skipTest(reason="This test requires torch >= 2.4 to run.") |
| 437 | + |
| 438 | + model = ModernBertForSequenceClassification.from_pretrained( |
| 439 | + "hf-internal-testing/tiny-random-ModernBertForSequenceClassification", |
| 440 | + reference_compile=False, |
| 441 | + attn_implementation="sdpa", |
| 442 | + ) |
| 443 | + tokenizer = AutoTokenizer.from_pretrained( |
| 444 | + "hf-internal-testing/tiny-random-ModernBertForSequenceClassification" |
| 445 | + ) |
| 446 | + |
| 447 | + inputs = tokenizer("Hello World!", return_tensors="pt") |
| 448 | + with torch.no_grad(): |
| 449 | + output = model(**inputs)[0] |
| 450 | + expected_shape = torch.Size((1, 2)) |
| 451 | + self.assertEqual(output.shape, expected_shape) |
| 452 | + |
| 453 | + expected = torch.tensor([[1.6466, 4.5662]]) |
| 454 | + self.assertTrue(torch.allclose(output, expected, atol=1e-4)) |
| 455 | + |
| 456 | + @slow |
| 457 | + def test_export(self): |
| 458 | + if version.parse(torch.__version__) < version.parse("2.4.0"): |
| 459 | + self.skipTest(reason="This test requires torch >= 2.4 to run.") |
| 460 | + |
| 461 | + bert_model = "answerdotai/ModernBERT-base" |
| 462 | + device = "cpu" |
| 463 | + attn_implementation = "sdpa" |
| 464 | + max_length = 512 |
| 465 | + |
| 466 | + tokenizer = AutoTokenizer.from_pretrained(bert_model) |
| 467 | + inputs = tokenizer( |
| 468 | + "the man worked as a [MASK].", |
| 469 | + return_tensors="pt", |
| 470 | + padding="max_length", |
| 471 | + max_length=max_length, |
| 472 | + ) |
| 473 | + |
| 474 | + model = ModernBertForMaskedLM.from_pretrained( |
| 475 | + bert_model, |
| 476 | + device_map=device, |
| 477 | + attn_implementation=attn_implementation, |
| 478 | + ) |
| 479 | + |
| 480 | + logits = model(**inputs).logits |
| 481 | + eg_predicted_mask = tokenizer.decode(logits[0, 6].topk(5).indices) |
| 482 | + self.assertEqual(eg_predicted_mask.split(), ["lawyer", "mechanic", "teacher", "doctor", "waiter"]) |
| 483 | + |
| 484 | + exported_program = torch.export.export( |
| 485 | + model, |
| 486 | + args=(inputs["input_ids"],), |
| 487 | + kwargs={"attention_mask": inputs["attention_mask"]}, |
| 488 | + strict=True, |
| 489 | + ) |
| 490 | + |
| 491 | + result = exported_program.module().forward(inputs["input_ids"], inputs["attention_mask"]) |
| 492 | + ep_predicted_mask = tokenizer.decode(result.logits[0, 6].topk(5).indices) |
| 493 | + self.assertEqual(eg_predicted_mask, ep_predicted_mask) |
0 commit comments