@@ -488,103 +488,19 @@ jobs:
488
488
# ARTIFACTS: |
489
489
# test-equinox.log
490
490
# secrets: inherit
491
- test-transformerengine-eks :
491
+ test-transformerengine-h100 :
492
492
# needs: build-jax
493
493
if : inputs.ARCHITECTURE == 'amd64'
494
494
runs-on : eks
495
- env :
495
+ uses : ./.github/workflows/transformer-engine/_unittests_eks.yaml
496
+ with :
496
497
# JAX_DOCKER_IMAGE: ${{ needs.build-jax.outputs.DOCKER_TAG_FINAL }}
497
498
JAX_DOCKER_IMAGE : ghcr.io/nvidia/jax-toolbox-internal:13652377029-jax-amd64
498
499
JOB_NAME : transformerengine-${{ github.run_id }}
499
500
S3_BUCKET : jax-toolbox-eks-output
500
501
CI_NAME : transformer-engine
501
- steps :
502
- - name : Check out the repository
503
- uses : actions/checkout@v4
504
- - name : Login to GitHub Container Registry
505
- uses : docker/login-action@v3
506
- with :
507
- registry : ghcr.io
508
- username : ${{ github.repository_owner }}
509
- password : ${{ secrets.GITHUB_TOKEN }}
510
- - name : K8s GHCR store and delete token
511
- id : store-token
512
- uses : ./.github/actions/store-delete-k8s-ghcr
513
- - name : Configure job manifest
514
- run : |
515
- cat .github/eks-workflow-files/transformer-engine/unit-tests.yml | \
516
- sed s@JOB_NAME@${{ env.JOB_NAME }}@g | \
517
- sed s@IMAGE_URI@${{ env.JAX_DOCKER_IMAGE }}@g | \
518
- sed s@SECRETS_NAME@${{ steps.store-token.outputs.token-name }}@g | \
519
- tee .github/eks-workflow-files/transformer-engine/unit-tests.yml
520
- - name : Submit & delete transformer engine unit test job
521
- uses : ./.github/actions/submit-delete-k8s-job
522
- with :
523
- job-config-file : .github/eks-workflow-files/transformer-engine/unit-tests.yml
524
- job-name : ${{ env.JOB_NAME }}
525
- - name : Download and process logs from S3
526
- id : s3-logs-process
527
- run : |
528
- LOCAL_DIR=${{ env.CI_NAME }}-output
502
+ secrets : inherit
529
503
530
- mkdir -p $LOCAL_DIR
531
- # aws s3 cp s3://${{ env.S3_BUCKET }}/${{ env.CI_NAME }}/${{ env.JOB_NAME }}/summary.txt $LOCAL_DIR/
532
- aws s3 cp s3://${{ env.S3_BUCKET }}/${{ env.CI_NAME }}/${{ env.JOB_NAME }}/ $LOCAL_DIR/ --recursive --exclude "*" --include "*.log"
533
-
534
- passed=$(cat $LOCAL_DIR/tests.log | grep 'PASSED opt/transformer' | wc -l || true)
535
- failed=$(cat $LOCAL_DIR/tests.log | grep 'FAILED opt/transformer' | wc -l || true)
536
-
537
- total=$((failed + passed))
538
- echo "Passed tests: $passed"
539
- echo "Failed tests: $failed"
540
- echo "Total tests: $total"
541
- echo "PASSED_TESTS=$passed" >> $GITHUB_OUTPUT
542
- echo "FAILED_TESTS=$failed" >> $GITHUB_OUTPUT
543
- echo "TOTAL_TESTS=$total" >> $GITHUB_OUTPUT
544
-
545
- - name : Generate sitrep
546
- id : sitrep
547
- if : ${{ !cancelled() }}
548
- shell : bash -x -e {0}
549
- run : |
550
- # bring in utility functions
551
- source .github/workflows/scripts/to_json.sh
552
-
553
- badge_label='TransformerEngine EKS Unit'
554
-
555
- total_tests=${{ steps.s3-logs-process.outputs.TOTAL_TESTS }} \
556
- failed_tests=${{ steps.s3-logs-process.outputs.FAILED_TESTS }} \
557
- passed_tests=${{ steps.s3-logs-process.outputs.PASSED_TESTS }} \
558
- errors="0" \
559
- summary="All tests: $total_tests. Passed: $passed_tests. Failed: $failed_tests." \
560
- badge_message="Passed $passed_tests out of $total_tests." \
561
- badge_color="brightgreen"
562
- if [ "$failed_tests" -gt 0 ]; then
563
- badge_color="red"
564
- fi \
565
-
566
- to_json \
567
- summary \
568
- errors total_tests passed_tests failed_tests \
569
- badge_label badge_color badge_message \
570
- > sitrep.json
571
-
572
- schemaVersion=1 \
573
- label="${badge_label}" \
574
- message="Passed $passed_tests out of $total_tests." \
575
- color=$badge_color \
576
- to_json schemaVersion label message color \
577
- > badge-transformer-engine-test.json
578
-
579
- - name : Upload artifacts
580
- if : ${{ !cancelled() }}
581
- uses : actions/upload-artifact@v4
582
- with :
583
- name : " artifact-transformer-engine-test"
584
- path : |
585
- sitrep.json
586
- badge-transformer-engine-test.json
587
- trasformer-engine-output/*
588
504
589
505
# te-unittests:
590
506
# secrets: inherit
@@ -753,4 +669,4 @@ jobs:
753
669
# uses: ./.github/workflows/_test_maxtext.yaml
754
670
# with:
755
671
# MAXTEXT_IMAGE: ${{ needs.build-maxtext.outputs.DOCKER_TAG_FINAL }}
756
- # secrets: inherit
672
+ # secrets: inherit
0 commit comments