[ci] retry examples tests on connection errors (#3319)

AlexanderDokuchaev · web-flow · commit 64d8468ca89d · 2025-03-07T10:43:26.000+04:00
### Changes Retry run example on connection errors ### Tests https://github.com/openvinotoolkit/nncf/actions/runs/13585977596
diff --git a/tests/cross_fw/examples/test_examples.py b/tests/cross_fw/examples/test_examples.py
@@ -12,6 +12,7 @@
 import os
 import subprocess
 import sys
+import time
 from pathlib import Path
 from typing import Any, Dict, List
 
@@ -38,13 +39,30 @@
 MODEL_SIZE_METRICS = "model_size_metrics"
 PERFORMANCE_METRICS = "performance_metrics"
 
+NUM_RETRY_ON_CONNECTION_ERROR = 2
+RETRY_TIMEOUT = 60
+
 
 def example_test_cases():
     example_scope = load_json(EXAMPLE_SCOPE_PATH)
     for example_name, example_params in example_scope.items():
         yield pytest.param(example_name, example_params, id=example_name)
 
 
+def _is_connection_error(txt: str) -> bool:
+    error_list = [
+        "ReadTimeoutError",
+        "HTTPError",
+        "URL fetch failure",
+    ]
+    for line in txt.split()[::-1]:
+        if any(e in line for e in error_list):
+            print("-------------------------------")
+            print(f"Detect connection error: {line}")
+            return True
+    return False
+
+
 @pytest.mark.parametrize("example_name, example_params", example_test_cases())
 def test_examples(
     tmp_path: Path,
@@ -93,8 +111,20 @@ def test_examples(
     run_cmd_line = f"{python_executable_with_venv} {run_example_py} --name {example_name} --output {metrics_file_path}"
     if data is not None:
         run_cmd_line += f" --data {data}"
-    cmd = Command(run_cmd_line, cwd=PROJECT_ROOT, env=env)
-    cmd.run()
+
+    retry_count = 0
+    while True:
+        cmd = Command(run_cmd_line, cwd=PROJECT_ROOT, env=env)
+        try:
+            ret = cmd.run()
+            if ret == 0:
+                break
+        except Exception as e:
+            if retry_count >= NUM_RETRY_ON_CONNECTION_ERROR or not _is_connection_error(str(e)):
+                raise e
+        retry_count += 1
+        print(f"Retry {retry_count} after {RETRY_TIMEOUT} seconds")
+        time.sleep(RETRY_TIMEOUT)
 
     measured_metrics = load_json(metrics_file_path)
     print(measured_metrics)