Skip to content

Commit cf85814

Browse files
committed
reusable actions test
1 parent 004ed78 commit cf85814

File tree

6 files changed

+127
-56
lines changed

6 files changed

+127
-56
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
name: Checkout, GHCR login, K8s secret
2+
description: Performs repository checkout, logs into GitHub Container Registry, and stores the token as a Kubernetes secret.
3+
4+
inputs:
5+
docker-username:
6+
description: Username for GHCR
7+
required: true
8+
docker-password:
9+
description: Password (e.g., GITHUB_TOKEN)
10+
required: true
11+
token-name:
12+
description: Name of the K8s secret to create
13+
required: true
14+
15+
runs:
16+
using: "composite"
17+
steps:
18+
- name: Check out the repository
19+
uses: actions/checkout@v4
20+
21+
- name: Login to GitHub Container Registry
22+
uses: docker/login-action@v3
23+
with:
24+
registry: "ghcr.io"
25+
username: ${{ inputs.docker-username }}
26+
password: ${{ inputs.docker-password }}
27+
28+
- name: Store GitHub Container Registry token as Kubernetes secret
29+
shell: bash
30+
run: |
31+
kubectl create secret generic \
32+
${{ inputs.token-name }} \
33+
--from-file=.dockerconfigjson=$HOME/.docker/config.json \
34+
--type=kubernetes.io/dockerconfigjson
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
name: Delete GHCR Token
2+
description: Deletes the K8s secret used for pulling images from GHCR.
3+
4+
inputs:
5+
token-name:
6+
description: Name of the K8s secret to delete
7+
required: true
8+
9+
runs:
10+
using: "composite"
11+
steps:
12+
- name: Delete GitHub Container Registry token
13+
shell: bash
14+
if: always()
15+
run: |
16+
kubectl delete secret ${{ inputs.token-name }}
+15
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
name: Delete K8s Job
2+
description: Cleans up the Job resource to avoid leaving pods behind.
3+
4+
inputs:
5+
job-name:
6+
description: The job name to delete
7+
required: true
8+
9+
runs:
10+
using: "composite"
11+
steps:
12+
- name: Delete Kubernetes job
13+
if: always()
14+
run: |
15+
kubectl delete job ${{ inputs.job-name }}
+31
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
name: Submit & Stream K8s Job
2+
description: Submits a Kubernetes job and then streams its logs to GitHub Actions.
3+
4+
inputs:
5+
job-config-file:
6+
description: Path to the Kubernetes job YAML
7+
required: true
8+
job-name:
9+
description: The job name
10+
required: true
11+
12+
runs:
13+
using: "composite"
14+
steps:
15+
- name: Submit Kubernetes job
16+
shell: bash
17+
run: |
18+
kubectl apply -f "${{ inputs.job-config-file }}"
19+
20+
- name: Wait for Kubernetes job to start
21+
shell: bash
22+
run: |
23+
while [[ -n $(kubectl get pods --selector=batch.kubernetes.io/job-name=${{ inputs.job-name }} --output=jsonpath='{.items[?(@.status.phase == "Pending")].metadata.name}') ]]; do
24+
echo "Waiting for pods to start..."
25+
sleep 10
26+
done
27+
28+
- name: Stream Kubernetes job output
29+
shell: bash
30+
run: |
31+
kubectl logs --all-containers=true --all-pods=true --follow job/${{ inputs.job-name }}

.github/container/test-axlearn.sh

+8-23
Original file line numberDiff line numberDiff line change
@@ -117,18 +117,18 @@ echo "Using CUDA devices: $CUDA_VISIBLE_DEVICES"
117117

118118
echo "Running tests..."
119119

120-
# If we are on Kubernetes, install torch
120+
# If we are on Kubernetes, install torch for cpu only
121121
if [ "$K8S" = true ]; then
122-
pip install torch==2.6.0+cpu.cxx11.abi-cp312-cp312-linux_x86_64.whl --index-url https://download.pytorch.org/whl/torch/
123-
#nvidia-cudnn-cu12==9.7.0.66
122+
pip install torch --extra-index-url https://download.pytorch.org/whl/cpu
124123
fi
125124

126125
if [ "${#TEST_FILES[@]}" -eq 0 ]; then
127126
TEST_FILES=("*_test.py")
128127
fi
128+
129129
expanded_test_files=()
130130
for pattern in "${TEST_FILES[@]}"; do
131-
# Use globbing to expand pattern
131+
# retrieve all the files
132132
files=( $pattern )
133133
if [ "${#files[@]}" -gt 0 ]; then
134134
expanded_test_files+=( "${files[@]}" )
@@ -137,19 +137,12 @@ for pattern in "${TEST_FILES[@]}"; do
137137
fi
138138
done
139139

140-
141140
if [ "${#expanded_test_files[@]}" -eq 0 ]; then
142141
echo "No test files found to run."
143142
exit 1
144143
fi
145144

146-
echo "These are the test files:"
147-
for f in "${expanded_test_files[@]}"; do
148-
echo " $f"
149-
done
150-
151-
# Get the directory where the script is located
152-
#SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
145+
# in case we have the exclusion list file
153146
EXCLUDE_LIST_FILE="$DIR/exclusion_list.txt"
154147
EXCLUDE_PATTERNS=()
155148

@@ -159,16 +152,11 @@ if [ -f "$EXCLUDE_LIST_FILE" ]; then
159152
else
160153
echo "Exclusion list file not found at '$EXCLUDE_LIST_FILE'"
161154
fi
162-
echo "Exclude patterns read:"
163-
for pattern in "${EXCLUDE_PATTERNS[@]}"; do
164-
echo "$pattern"
165-
done
166155

167156
final_test_files=()
168157

169158
for test_file in "${expanded_test_files[@]}"; do
170159
exclude=false
171-
#echo $test_file
172160
for pattern in "${EXCLUDE_PATTERNS[@]}"; do
173161
if [[ "$(basename "$test_file")" == "$(basename "$pattern")" ]]; then
174162
exclude=true
@@ -180,7 +168,7 @@ for test_file in "${expanded_test_files[@]}"; do
180168
fi
181169
done
182170

183-
# Initialize counters
171+
# Initialize counters for test
184172
failures=0
185173
passed=0
186174
SUMMARY_FILE="${OUTPUT_DIRECTORY}/summary.txt"
@@ -198,9 +186,9 @@ for test_file in "${final_test_files[@]:0:5}"; do
198186
log_file="${LOG_DIRECTORY}/${log_file_name}"
199187
# run the tests and save them as *.log
200188
pytest "${test_file}" --capture=tee-sys | tee "${log_file}"
201-
# TODO parse the logs?
202189
exit_code=${PIPESTATUS[0]}
203190
echo $exit_code
191+
# write number of tests passed and failed
204192
if [ $exit_code -eq 0 ]; then
205193
echo "${test_file}: PASSED" >> "${SUMMARY_FILE}"
206194
((passed++))
@@ -209,7 +197,4 @@ for test_file in "${final_test_files[@]:0:5}"; do
209197
((failures++))
210198
fi
211199
echo ""
212-
done
213-
214-
echo $passed
215-
echo $failures
200+
done

.github/workflows/_ci.yaml

+23-33
Original file line numberDiff line numberDiff line change
@@ -719,35 +719,25 @@ jobs:
719719
# test-backend-independent.log
720720
# secrets: inherit
721721

722-
# TODO WE CAN CREATE A RESUABLE ACTION HERE
723-
# FIX everything with env.something
722+
724723
test-axlearn-eks:
725724
needs: build-axlearn
726725
if: inputs.ARCHITECTURE == 'amd64'
727726
runs-on: eks
728727
env:
729728
AXLEARN_DOCKER_IMAGE: ${{ needs.build-axlearn.outputs.DOCKER_TAG_FINAL }}
730-
JOB_NAME: ${{ github.run_id }}-${{ github.run_attempt }}-axlearn
731-
POSTPROCESS_JOB_NAME: ${{ github.run_id }}-${{ github.run_attempt }}-axlearn-postprocess
732-
TOKEN_NAME: ${{ github.run_id }}-${{ github.run_attempt }}-token
729+
JOB_NAME: axlearn-${{ github.run_id }}
730+
TOKEN_NAME: axlearn-${{ github.run_id }}-token
733731
steps:
734-
- name: Check out the repository
735-
uses: actions/checkout@v4
736-
- name: Login to GitHub Container Registry
737-
uses: docker/login-action@v3
738-
with:
739-
registry: ghcr.io
740-
username: ${{ github.repository_owner }}
741-
password: ${{ secrets.GITHUB_TOKEN }}
742-
- name: Store GitHub Container Registry token as Kubernetes secret
743-
run: |
744-
kubectl create secret generic \
745-
${{ env.TOKEN_NAME }} \
746-
--from-file=.dockerconfigjson=$HOME/.docker/config.json \
747-
--type=kubernetes.io/dockerconfigjson
748-
- name: Set date environment variable
732+
- name: Set date env var for saving files
749733
run: |
750734
echo "DATE_TEST_RAN=$(date +'%Y-%m-%d-%H-%M-%S')" >> $GITHUB_ENV
735+
- name: Check and GHCR Login
736+
uses: /.github/actions/checkout-ghcr-login
737+
with:
738+
docker-username: ${{ github.repository_owner }}
739+
password: ${{ secrets.GITHUB_TOKEN }}
740+
token-name: ${{ env.TOKEN_NAME }}
751741
- name: Configure axlearn test job
752742
run: |
753743
# Replace placeholders in axlearn-job.yml with environment variables
@@ -758,18 +748,17 @@ jobs:
758748
| select(di == 0).spec.template.spec.imagePullSecrets[].name = strenv(TOKEN_NAME)' \
759749
.github/eks-workflow-files/axlearn/axlearn-job.yml
760750
git diff .github/eks-workflow-files/axlearn/axlearn-job.yml
761-
- name: Submit axlearn test job
762-
run: kubectl apply -f .github/eks-workflow-files/axlearn/axlearn-job.yml
763-
- name: Wait for axlearn test job to start
764-
run: |
765-
while [[ -n $(kubectl get pods --selector=batch.kubernetes.io/job-name=${{ env.JOB_NAME }} --output=jsonpath='{.items[?(@.status.phase == "Pending")].metadata.name}') ]]; do
766-
sleep 10
767-
done
768-
- name: Stream axlearn test job output
769-
run: kubectl logs --all-containers=true --all-pods=true --follow job/${{ env.JOB_NAME }}
751+
- name: Submit & wait for axlearn test job
752+
uses: ./.github/actions/submit-k8s-job
753+
with:
754+
job-config-file: ".github/eks-workflow-files/axlearn/axlearn-job.yml"
755+
job-name: ${{ env.JOB_NAME }}
756+
770757
- name: Delete axlearn test job
771-
if: always()
772-
run: kubectl delete job ${{ env.JOB_NAME }}
758+
uses: ./.github/actions/delete-k8s-job
759+
with:
760+
job-name: ${{ env.JOB_NAME }}
761+
773762
- name: Download logs from S3
774763
run: |
775764
mkdir -p /tmp/axlearn-output
@@ -831,6 +820,7 @@ jobs:
831820
"badge-axlearn-test"
832821
summary.txt
833822
- name: Delete GitHub Container Registry token
834-
if: always()
835-
run: kubectl delete secret ${{ env.TOKEN }}
823+
uses: ./.github/actions/delete-ghcr-token
824+
with:
825+
token-name: ${{ env.TOKEN_NAME }}
836826

0 commit comments

Comments
 (0)