iree-org
diff --git a/‎onnx_models/README.md
+25 b/‎onnx_models/README.md
+25
diff --git a/‎onnx_models/configs/onnx_models_cpu_llvm_task.json
+21 b/‎onnx_models/configs/onnx_models_cpu_llvm_task.json
+21
diff --git a/‎onnx_models/configs/onnx_models_gpu_rocm_rdna3.json
+21 b/‎onnx_models/configs/onnx_models_gpu_rocm_rdna3.json
+21
diff --git a/‎onnx_models/configs/onnx_models_gpu_vulkan.json
+29 b/‎onnx_models/configs/onnx_models_gpu_vulkan.json
+29
diff --git a/‎onnx_models/conftest.py
+146-52 b/‎onnx_models/conftest.py
+146-52
diff --git a/‎onnx_models/requirements.txt
+2 b/‎onnx_models/requirements.txt
+2
@@ -53,6 +53,31 @@ graph LR
 
     See https://docs.pytest.org/en/stable/how-to/usage.html for other options.
 
+## Customizing compile and run configurations
+
+By default, the
+[`onnx_models_cpu_llvm_task.json`](./configs/onnx_models_cpu_llvm_task.json)
+config is used, which runs the tests on IREE's CPU backend and sets some
+pass/fail test expectations. To change this, run pytest with the
+`--test-config-file=` option:
+
+```bash
+pytest \
+  -rA \
+  --log-cli-level=info \
+  --test-config-file=./onnx_models/configs/onnx_models_gpu_vulkan.json \
+  --durations=0
+```
+
+Note that these config files can be tracked independently from the
+iree-test-suites repository so you can, for example:
+
+* Run the tests from [iree-org/iree](https://github.com/iree-org/iree) at a
+  specific commit that impacts tests outcomes and updates the config file to
+  match the new results
+* Run the tests from another repository using a custom backend
+* Add custom flags to see if the test outcomes change
+
 ## Advanced pytest usage
 
 * The `log-cli-level` level can also be set to `debug`, `warning`, or `error`.
 
@@ -0,0 +1,21 @@
+{
+  "config_name": "cpu_llvm_task",
+  "iree_compile_flags": [
+    "--iree-hal-target-backends=llvm-cpu",
+    "--iree-llvmcpu-target-cpu=host"
+  ],
+  "iree_run_module_flags": [
+    "--device=local-task"
+  ],
+  "tests_and_expected_outcomes": {
+    "default": "pass",
+    "tests/model_zoo/validated/vision/classification_models_test.py::test_models[inception_and_googlenet/inception_v1/model/inception-v1-12.onnx]": "fail-compile",
+    "tests/model_zoo/validated/vision/object_detection_segmentation_models_test.py::test_models[faster-rcnn/model/FasterRCNN-12.onnx]": "fail-compile",
+    "tests/model_zoo/validated/vision/object_detection_segmentation_models_test.py::test_models[fcn/model/fcn-resnet50-12.onnx]": "fail-run",
+    "tests/model_zoo/validated/vision/object_detection_segmentation_models_test.py::test_models[mask-rcnn/model/MaskRCNN-12.onnx]": "fail-compile",
+    "tests/model_zoo/validated/vision/object_detection_segmentation_models_test.py::test_models[retinanet/model/retinanet-9.onnx]": "fail-run",
+    "tests/model_zoo/validated/vision/object_detection_segmentation_models_test.py::test_models[ssd/model/ssd-12.onnx]": "fail-compile",
+    "tests/model_zoo/validated/vision/object_detection_segmentation_models_test.py::test_models[yolov4/model/yolov4.onnx]": "fail-run",
+    "tests/model_zoo/validated/vision/style_transfer_models_test.py::test_models[fast_neural_style/model/mosaic-9.onnx]": "fail-compile"
+  }
+}
@@ -0,0 +1,21 @@
+{
+  "config_name": "gpu_rocm_rdna3",
+  "iree_compile_flags": [
+    "--iree-hal-target-backends=rocm",
+    "--iree-hip-target=gfx1100"
+  ],
+  "iree_run_module_flags": [
+    "--device=hip"
+  ],
+  "tests_and_expected_outcomes": {
+    "default": "pass",
+    "tests/model_zoo/validated/vision/classification_models_test.py::test_models[inception_and_googlenet/inception_v1/model/inception-v1-12.onnx]": "fail-compile",
+    "tests/model_zoo/validated/vision/object_detection_segmentation_models_test.py::test_models[faster-rcnn/model/FasterRCNN-12.onnx]": "fail-compile",
+    "tests/model_zoo/validated/vision/object_detection_segmentation_models_test.py::test_models[fcn/model/fcn-resnet50-12.onnx]": "fail-run",
+    "tests/model_zoo/validated/vision/object_detection_segmentation_models_test.py::test_models[mask-rcnn/model/MaskRCNN-12.onnx]": "fail-compile",
+    "tests/model_zoo/validated/vision/object_detection_segmentation_models_test.py::test_models[retinanet/model/retinanet-9.onnx]": "fail-run",
+    "tests/model_zoo/validated/vision/object_detection_segmentation_models_test.py::test_models[ssd/model/ssd-12.onnx]": "fail-compile",
+    "tests/model_zoo/validated/vision/object_detection_segmentation_models_test.py::test_models[yolov4/model/yolov4.onnx]": "fail-run",
+    "tests/model_zoo/validated/vision/style_transfer_models_test.py::test_models[fast_neural_style/model/mosaic-9.onnx]": "fail-compile"
+  }
+}
@@ -0,0 +1,29 @@
+{
+  "config_name": "gpu_vulkan",
+  "iree_compile_flags": [
+    "--iree-hal-target-backends=vulkan-spirv"
+  ],
+  "iree_run_module_flags": [
+    "--device=vulkan"
+  ],
+  "tests_and_expected_outcomes": {
+    "default": "pass",
+    "tests/model_zoo/validated/vision/body_analysis_models_test.py::test_models[age_gender/models/age_googlenet.onnx]": "fail-run",
+    "tests/model_zoo/validated/vision/body_analysis_models_test.py::test_models[age_gender/models/gender_googlenet.onnx]": "fail-run",
+    "tests/model_zoo/validated/vision/classification_models_test.py::test_models[densenet-121/model/densenet-12.onnx]": "fail-compile",
+    "tests/model_zoo/validated/vision/classification_models_test.py::test_models[efficientnet-lite4/model/efficientnet-lite4-11.onnx]": "fail-run",
+    "tests/model_zoo/validated/vision/classification_models_test.py::test_models[inception_and_googlenet/googlenet/model/googlenet-12.onnx]": "fail-run",
+    "tests/model_zoo/validated/vision/classification_models_test.py::test_models[inception_and_googlenet/inception_v1/model/inception-v1-12.onnx]": "fail-compile",
+    "tests/model_zoo/validated/vision/classification_models_test.py::test_models[inception_and_googlenet/inception_v2/model/inception-v2-9.onnx]": "fail-compile",
+    "tests/model_zoo/validated/vision/classification_models_test.py::test_models[shufflenet/model/shufflenet-9.onnx]": "fail-compile",
+    "tests/model_zoo/validated/vision/classification_models_test.py::test_models[shufflenet/model/shufflenet-v2-12.onnx]": "fail-compile",
+    "tests/model_zoo/validated/vision/object_detection_segmentation_models_test.py::test_models[faster-rcnn/model/FasterRCNN-12.onnx]": "fail-compile",
+    "tests/model_zoo/validated/vision/object_detection_segmentation_models_test.py::test_models[fcn/model/fcn-resnet50-12.onnx]": "fail-compile",
+    "tests/model_zoo/validated/vision/object_detection_segmentation_models_test.py::test_models[mask-rcnn/model/MaskRCNN-12.onnx]": "fail-compile",
+    "tests/model_zoo/validated/vision/object_detection_segmentation_models_test.py::test_models[retinanet/model/retinanet-9.onnx]": "fail-compile",
+    "tests/model_zoo/validated/vision/object_detection_segmentation_models_test.py::test_models[ssd/model/ssd-12.onnx]": "fail-compile",
+    "tests/model_zoo/validated/vision/object_detection_segmentation_models_test.py::test_models[yolov4/model/yolov4.onnx]": "fail-compile",
+    "tests/model_zoo/validated/vision/style_transfer_models_test.py::test_models[fast_neural_style/model/mosaic-9.onnx]": "fail-compile",
+    "tests/model_zoo/validated/vision/super_resolution_models_test.py::test_models[sub_pixel_cnn_2016/model/super-resolution-10.onnx]": "fail-run"
+  }
+}
@@ -4,12 +4,15 @@
 # See https://llvm.org/LICENSE.txt for license information.
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
+import json
 import logging
+import os
+import pyjson5
 import pytest
 import subprocess
 import urllib.request
 from dataclasses import dataclass
-from onnxruntime import InferenceSession
+from onnxruntime import InferenceSession, SessionOptions
 from pathlib import Path
 
 from .utils import *
@@ -20,6 +23,96 @@
 ARTIFACTS_ROOT = THIS_DIR / "artifacts"
 
 
+###############################################################################
+# Configuration
+###############################################################################
+
+
+def pytest_addoption(parser):
+    # List of configuration files following this schema:
+    #   {
+    #     "config_name": str,
+    #     "iree_compile_flags": list of str,
+    #     "iree_run_module_flags": list of str,
+    #     "skip_compile_tests": list of str,
+    #     "skip_run_tests": list of str,
+    #     "tests_and_expected_outcomes": dict
+    #   }
+    #
+    # For example, to run some tests on CPU with the `llvm-cpu` backend and
+    # `local-task` device:
+    #   {
+    #     "config_name": "cpu_llvm_task",
+    #     "iree_compile_flags": ["--iree-hal-target-backends=llvm-cpu"],
+    #     "iree_run_module_flags": ["--device=local-task"],
+    #     "tests_and_expected_outcomes": {
+    #       "default": "skip",
+    #       "tests/foo/bar/baz.py::test_a": "pass",
+    #       "tests/foo/bar/baz.py::test_b[params/x]": "fail-import",
+    #       "tests/foo/bar/baz.py::test_b[params/y]": "fail-import",
+    #       "tests/foo/bar/baz.py::test_b[params/z]": "fail-import",
+    #       "tests/foo/bar/baz.py::test_c": "fail-compile",
+    #       "tests/foo/bar/baz.py::test_d": "fail-run"
+    #     }
+    #   }
+    #
+    # The file can be specified in (by order of preference):
+    #   1. The `--config-file` argument
+    #       e.g. `pytest ... --config-file foo.json`
+    #   2. The `IREE_TEST_CONFIG_FILE` environment variable
+    #       e.g. `export IREE_TEST_CONFIG_FILE=foo.json`
+    #   3. A default config file used for testing the test suite itself
+    default_config_file = os.getenv(
+        "IREE_TEST_CONFIG_FILE", THIS_DIR / "configs" / "onnx_models_cpu_llvm_task.json"
+    )
+    parser.addoption(
+        "--test-config-file",
+        type=Path,
+        default=default_config_file,
+        help="Config JSON file used to parameterize test cases",
+    )
+
+
+def pytest_sessionstart(session):
+    config_file_path = session.config.getoption("test_config_file")
+    with open(config_file_path) as config_file:
+        test_config = pyjson5.load(config_file)
+    session.config.iree_test_config = test_config
+
+
+def pytest_collection_modifyitems(session, config, items):
+    logger.debug(f"pytest_collection_modifyitems with {len(items)} items:")
+
+    tests_and_expected_outcomes = config.iree_test_config["tests_and_expected_outcomes"]
+    default_outcome = tests_and_expected_outcomes.get("default", "skip")
+
+    for item in items:
+        # Build a test name from the test item location, matching how the test
+        # appears in logs, e.g.
+        # "tests/model_zoo/validated/vision/classification_models_test.py::test_alexnet"
+        # https://docs.pytest.org/en/stable/reference/reference.html#pytest.Item
+        standardized_location_0 = item.location[0].replace("\\", "/")
+        item_path = f"{standardized_location_0}::{item.location[2]}"
+
+        expected_outcome = tests_and_expected_outcomes.get(item_path, default_outcome)
+        logger.debug(f"Expected outcome for {item_path} is {expected_outcome}")
+
+        if expected_outcome == "skip":
+            mark = pytest.mark.skip(reason="Test not included in config")
+            item.add_marker(mark)
+        elif expected_outcome == "pass":
+            pass
+        elif expected_outcome == "fail-import":
+            mark = pytest.mark.xfail(raises=IreeImportOnnxException)
+            item.add_marker(mark)
+        elif expected_outcome == "fail-compile":
+            mark = pytest.mark.xfail(raises=IreeCompileException)
+            item.add_marker(mark)
+        elif expected_outcome == "fail-run":
+            mark = pytest.mark.xfail(raises=IreeRunException)
+            item.add_marker(mark)
+
+
 ###############################################################################
 # ONNX loading, running, import, etc.
 ###############################################################################
@@ -60,7 +153,9 @@ def get_onnx_model_metadata(onnx_path: Path) -> OnnxModelMetadata:
     #   C) Get metadata on demand from the InferenceSession using 'onnxruntime'
     # This is option C.
 
-    onnx_session = InferenceSession(onnx_path)
+    so = SessionOptions()
+    so.log_severity_level = 3  # ignore warnings
+    onnx_session = InferenceSession(onnx_path, so)
     logger.info(f"Getting model metadata for '{onnx_path.relative_to(THIS_DIR)}'")
     inputs = []
     onnx_inputs = {}
@@ -161,56 +256,55 @@ def run_iree_module(iree_module_path: Path, run_flags: list[str]):
         raise IreeRunException(f"  '{iree_module_path.name}' run failed")
 
 
-def compare_between_iree_and_onnxruntime_fn(model_url: str, artifacts_subdir=""):
-    test_artifacts_dir = ARTIFACTS_ROOT / artifacts_subdir
-    if not test_artifacts_dir.is_dir():
-        test_artifacts_dir.mkdir(parents=True)
-
-    # Extract path and file components from the model URL.
-    # "https://github.com/.../mobilenetv2-12.onnx" --> "mobilenetv2-12.onnx"
-    model_file_name = model_url.rsplit("/", 1)[-1]
-    # "mobilenetv2-12.onnx" --> "mobilenetv2-12"
-    model_name = model_file_name.rsplit(".", 1)[0]
-
-    # Download the model as needed.
-    # TODO(scotttodd): move to fixture with cache / download on demand
-    # TODO(scotttodd): overwrite if already existing? check SHA?
-    onnx_path = test_artifacts_dir / f"{model_name}.onnx"
-    if not onnx_path.exists():
-        urllib.request.urlretrieve(model_url, onnx_path)
-
-    # TODO(scotttodd): cache ONNX metadata and runtime results (pickle?)
-    onnx_model_metadata = get_onnx_model_metadata(onnx_path)
-    logger.debug(onnx_model_metadata)
-
-    # Prepare inputs and expected outputs for running through IREE.
-    run_module_args = []
-    for input in onnx_model_metadata.inputs:
-        run_module_args.append(
-            f"--input={input.type}=@{input.data_file.relative_to(THIS_DIR)}"
-        )
-    for output in onnx_model_metadata.outputs:
-        run_module_args.append(
-            f"--expected_output={output.type}=@{output.data_file.relative_to(THIS_DIR)}"
-        )
-
-    # Import, compile, then run with IREE.
-    imported_mlir_path = import_onnx_model_to_mlir(onnx_path)
-    iree_module_path = compile_mlir_with_iree(
-        imported_mlir_path,
-        "cpu",
-        [
-            "--iree-hal-target-backends=llvm-cpu",
-            "--iree-llvmcpu-target-cpu=host",
-        ],
-    )
-    # Note: could load the output into memory here and compare using numpy
-    # if the pass/fail criteria is difficult to model in the native tooling.
-    run_flags = ["--device=local-task"]
-    run_flags.extend(run_module_args)
-    run_iree_module(iree_module_path, run_flags)
+@pytest.fixture
+def compare_between_iree_and_onnxruntime(pytestconfig):
+    config_name = pytestconfig.iree_test_config["config_name"]
+    iree_compile_flags = pytestconfig.iree_test_config["iree_compile_flags"]
+    iree_run_module_flags = pytestconfig.iree_test_config["iree_run_module_flags"]
+
+    def compare_between_iree_and_onnxruntime_fn(model_url: str, artifacts_subdir=""):
+        test_artifacts_dir = ARTIFACTS_ROOT / artifacts_subdir
+        if not test_artifacts_dir.is_dir():
+            test_artifacts_dir.mkdir(parents=True)
+
+        # Extract path and file components from the model URL.
+        # "https://github.com/.../mobilenetv2-12.onnx" --> "mobilenetv2-12.onnx"
+        model_file_name = model_url.rsplit("/", 1)[-1]
+        # "mobilenetv2-12.onnx" --> "mobilenetv2-12"
+        model_name = model_file_name.rsplit(".", 1)[0]
+
+        # Download the model as needed.
+        # TODO(scotttodd): move to fixture with cache / download on demand
+        # TODO(scotttodd): overwrite if already existing? check SHA?
+        # TODO(scotttodd): redownload if file is corrupted (e.g. partial download)
+        onnx_path = test_artifacts_dir / f"{model_name}.onnx"
+        if not onnx_path.exists():
+            urllib.request.urlretrieve(model_url, onnx_path)
+
+        # TODO(scotttodd): cache ONNX metadata and runtime results (pickle?)
+        onnx_model_metadata = get_onnx_model_metadata(onnx_path)
+        logger.debug(onnx_model_metadata)
+
+        # Prepare inputs and expected outputs for running through IREE.
+        run_module_args = []
+        for input in onnx_model_metadata.inputs:
+            run_module_args.append(
+                f"--input={input.type}=@{input.data_file.relative_to(THIS_DIR)}"
+            )
+        for output in onnx_model_metadata.outputs:
+            run_module_args.append(
+                f"--expected_output={output.type}=@{output.data_file.relative_to(THIS_DIR)}"
+            )
 
+        # Import, compile, then run with IREE.
+        imported_mlir_path = import_onnx_model_to_mlir(onnx_path)
+        iree_module_path = compile_mlir_with_iree(
+            imported_mlir_path, config_name, iree_compile_flags.copy()
+        )
+        # Note: could load the output into memory here and compare using numpy
+        # if the pass/fail criteria is difficult to model in the native tooling.
+        run_flags = iree_run_module_flags.copy()
+        run_flags.extend(run_module_args)
+        run_iree_module(iree_module_path, run_flags)
 
-@pytest.fixture
-def compare_between_iree_and_onnxruntime():
     return compare_between_iree_and_onnxruntime_fn
@@ -3,6 +3,8 @@
 
 onnx
 onnxruntime
+
+pyjson5
 pytest
 pytest-html
 pytest-reportlog