Skip to content

Rename transformer engine manifest #6

Rename transformer engine manifest

Rename transformer engine manifest #6

Workflow file for this run

name: ~test TransformerEngine
on:
workflow_call:
inputs:
TE_IMAGE:
type: string
description: 'JAX+TE+PAXML image'
required: true
default: 'ghcr.io/nvidia/upstream-pax:latest'
ARTIFACT_PREFIX:
type: string
description: 'Name of the artifact zip file'
required: false
default: 'te'
jobs:
# te-multi-gpu:
# uses: ./.github/workflows/_test_slurm_pyxis.yaml
# strategy:
# matrix:
# N_GPU: [2, 4, 8]
# fail-fast: false
# secrets:
# SSH_PRIVATE_KEY: ${{ secrets.SSH_PRIVATE_KEY }}
# SLURM_LOGIN_USER: ${{ secrets.CLUSTER_LOGIN_USER }}
# CONTAINER_REGISTRY_TOKEN: ${{ secrets.github_token }}
# with:
# NAME: ${{ inputs.ARTIFACT_PREFIX }}-${{ matrix.N_GPU }}GPU
# SLURM_LOGIN_HOSTNAME: ${{ vars.HOSTNAME_SLURM_LOGIN }}
# OUTPUT_BASEDIR: /nfs/cluster
# OUTPUT_MOUNTPOINT: /output
# NODES: 1
# GPUS_PER_NODE: ${{ matrix.N_GPU }}
# NTASKS: 1
# NTASKS_PER_NODE: 1
# TIME_LIMIT: '00:10:00'
# EXTRA_EXPORTS: 'VOCAB_PATH=gs://t5-data/vocabs/cc_all.32000.100extra/sentencepiece.model'
# IMAGE: ${{ inputs.TE_IMAGE }}
# SRUN_PREAMBLE: |
# nvidia-smi
# pip install \
# pytest \
# pytest-reportlog \
# cuda-python \
# -r ${SRC_PATH_TRANSFORMER_ENGINE}/examples/jax/encoder/requirements.txt
# SRUN_SCRIPT: |
# set -ex
# cd ${SRC_PATH_TRANSFORMER_ENGINE}/examples/jax/encoder
# pytest --report-log=/output/pytest-report.jsonl \
# test_single_gpu_encoder.py \
# test_multigpu_encoder.py \
# test_model_parallel_encoder.py