Skip to content

Commit cc5ef37

Browse files
Merge branch 'NOAA-EMC:develop' into feature/jedi_atm_ci
2 parents a41a0db + 54daa31 commit cc5ef37

16 files changed

+390
-48
lines changed

Jenkinsfile

+188
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,188 @@
1+
def Machine = 'none'
2+
def machine = 'none'
3+
def HOME = 'none'
4+
def localworkspace = 'none'
5+
def commonworkspace = 'none'
6+
7+
pipeline {
8+
agent { label 'built-in' }
9+
10+
options {
11+
skipDefaultCheckout()
12+
buildDiscarder(logRotator(numToKeepStr: '2'))
13+
}
14+
15+
stages { // This initial stage is used to get the Machine name from the GitHub labels on the PR
16+
// which is used to designate the Nodes in the Jenkins Controler by the agent label
17+
// Each Jenknis Node is connected to said machine via an JAVA agent via an ssh tunnel
18+
19+
stage('Get Machine') {
20+
agent { label 'built-in' }
21+
steps {
22+
script {
23+
localworkspace = env.WORKSPACE
24+
machine = 'none'
25+
for (label in pullRequest.labels) {
26+
echo "Label: ${label}"
27+
if ((label.matches("CI-Hera-Ready"))) {
28+
machine = 'hera'
29+
} else if ((label.matches("CI-Orion-Ready"))) {
30+
machine = 'orion'
31+
} else if ((label.matches("CI-Hercules-Ready"))) {
32+
machine = 'hercules'
33+
}
34+
} // createing a second machine varible with first letter capital
35+
// because the first letter of the machine name is captitalized in the GitHub labels
36+
Machine = machine[0].toUpperCase() + machine.substring(1)
37+
}
38+
}
39+
}
40+
41+
stage('Get Common Workspace') {
42+
agent { label "${machine}-emc" }
43+
steps {
44+
script {
45+
properties([parameters([[$class: 'NodeParameterDefinition', allowedSlaves: ['built-in','Hera-EMC','Orion-EMC'], defaultSlaves: ['built-in'], name: '', nodeEligibility: [$class: 'AllNodeEligibility'], triggerIfResult: 'allCases']])])
46+
HOME = "${WORKSPACE}/TESTDIR"
47+
commonworkspace = "${WORKSPACE}"
48+
sh( script: "mkdir -p ${HOME}/RUNTESTS", returnStatus: true)
49+
pullRequest.addLabel("CI-${Machine}-Building")
50+
if ( pullRequest.labels.any{ value -> value.matches("CI-${Machine}-Ready") } ) {
51+
pullRequest.removeLabel("CI-${Machine}-Ready")
52+
}
53+
}
54+
}
55+
}
56+
57+
stage('Build System') {
58+
matrix {
59+
agent { label "${machine}-emc" }
60+
//options {
61+
// throttle(['global_matrix_build'])
62+
//}
63+
axes {
64+
axis {
65+
name "system"
66+
values "gfs", "gefs"
67+
}
68+
}
69+
stages {
70+
stage("build system") {
71+
steps {
72+
script {
73+
def HOMEgfs = "${HOME}/${system}" // local HOMEgfs is used to build the system on per system basis under the common workspace HOME
74+
sh( script: "mkdir -p ${HOMEgfs}", returnStatus: true)
75+
ws(HOMEgfs) {
76+
env.MACHINE_ID = machine // MACHINE_ID is used in the build scripts to determine the machine and is added to the shell environment
77+
if (fileExists("${HOMEgfs}/sorc/BUILT_semaphor")) { // if the system is already built, skip the build in the case of re-runs
78+
sh( script: "cat ${HOMEgfs}/sorc/BUILT_semaphor", returnStdout: true).trim() // TODO: and user configurable control to manage build semphore
79+
ws(commonworkspace) { pullRequest.comment("Cloned PR already built (or build skipped) on ${machine} in directory ${HOMEgfs}") }
80+
} else {
81+
checkout scm
82+
sh( script: "source workflow/gw_setup.sh;which git;git --version;git submodule update --init --recursive", returnStatus: true)
83+
def builds_file = readYaml file: "ci/cases/yamls/build.yaml"
84+
def build_args_list = builds_file['builds']
85+
def build_args = build_args_list[system].join(" ").trim().replaceAll("null", "")
86+
dir("${HOMEgfs}/sorc") {
87+
sh( script: "${build_args}", returnStatus: true)
88+
sh( script: "./link_workflow.sh", returnStatus: true)
89+
sh( script: "echo ${HOMEgfs} > BUILT_semaphor", returnStatus: true)
90+
}
91+
}
92+
if ( pullRequest.labels.any{ value -> value.matches("CI-${Machine}-Building") } ) {
93+
pullRequest.removeLabel("CI-${Machine}-Building")
94+
}
95+
pullRequest.addLabel("CI-${Machine}-Running")
96+
}
97+
}
98+
}
99+
}
100+
}
101+
}
102+
}
103+
104+
stage('Run Tests') {
105+
matrix {
106+
agent { label "${machine}-emc" }
107+
axes {
108+
axis {
109+
name "Case"
110+
values "C48_ATM", "C48_S2SWA_gefs", "C48_S2SW", "C96_atm3DVar" // TODO add dynamic list of cases from env vars (needs addtional plugins)
111+
}
112+
}
113+
stages {
114+
stage('Create Experiment') {
115+
steps {
116+
script {
117+
sh( script: "sed -n '/{.*}/!p' ${HOME}/gfs/ci/cases/pr/${Case}.yaml > ${HOME}/gfs/ci/cases/pr/${Case}.yaml.tmp", returnStatus: true)
118+
def yaml_case = readYaml file: "${HOME}/gfs/ci/cases/pr/${Case}.yaml.tmp"
119+
system = yaml_case.experiment.system
120+
def HOMEgfs = "${HOME}/${system}" // local HOMEgfs is used to populate the XML on per system basis
121+
env.RUNTESTS = "${HOME}/RUNTESTS"
122+
sh( script: "${HOMEgfs}/ci/scripts/utils/ci_utils_wrapper.sh create_experiment ${HOMEgfs}/ci/cases/pr/${Case}.yaml", returnStatus: true)
123+
}
124+
}
125+
}
126+
stage('Run Experiments') {
127+
steps {
128+
script {
129+
HOMEgfs = "${HOME}/gfs" // common HOMEgfs is used to launch the scripts that run the experiments
130+
ws(HOMEgfs) {
131+
pslot = sh( script: "${HOMEgfs}/ci/scripts/utils/ci_utils_wrapper.sh get_pslot ${HOME}/RUNTESTS ${Case}", returnStdout: true ).trim()
132+
pullRequest.comment("**Running experiments: ${Case} on ${Machine}**<br>Built against system **${system}** in directory:<br>`${HOMEgfs}`<br>With the experiment in directory:<br>`${HOME}/RUNTESTS/${pslot}`")
133+
try {
134+
sh( script: "${HOMEgfs}/ci/scripts/run-check_ci.sh ${HOME} ${pslot}", returnStatus: true)
135+
} catch (Exception e) {
136+
pullRequest.comment("**FAILURE** running experiments: ${Case} on ${Machine}")
137+
error("Failed to run experiments ${Case} on ${Machine}")
138+
}
139+
pullRequest.comment("**SUCCESS** running experiments: ${Case} on ${Machine}")
140+
}
141+
}
142+
}
143+
post {
144+
always {
145+
script {
146+
ws (HOMEgfs) {
147+
for (label in pullRequest.labels) {
148+
if (label.contains("${Machine}")) {
149+
pullRequest.removeLabel(label)
150+
}
151+
}
152+
}
153+
}
154+
}
155+
success {
156+
script {
157+
ws (HOMEgfs) {
158+
pullRequest.addLabel("CI-${Machine}-Passed")
159+
def timestamp = new Date().format("MM dd HH:mm:ss", TimeZone.getTimeZone('America/New_York'))
160+
pullRequest.comment("**CI SUCCESS** ${Machine} at ${timestamp}\n\nBuilt and ran in directory `${HOME}`")
161+
}
162+
}
163+
}
164+
failure {
165+
script {
166+
ws (HOMEgfs) {
167+
pullRequest.addLabel("CI-${Machine}-Failed")
168+
def timestamp = new Date().format("MM dd HH:mm:ss", TimeZone.getTimeZone('America/New_York'))
169+
pullRequest.comment("**CI FAILED** ${Machine} at ${timestamp}<br>Built and ran in directory `${HOME}`")
170+
if (fileExists('${HOME}/RUNTESTS/ci.log')) {
171+
def fileContent = readFile '${HOME}/RUNTESTS/ci.log'
172+
fileContent.eachLine { line ->
173+
if( line.contains(".log")) {
174+
archiveArtifacts artifacts: "${line}", fingerprint: true
175+
}
176+
}
177+
}
178+
}
179+
}
180+
}
181+
}
182+
}
183+
}
184+
}
185+
}
186+
}
187+
188+
}

ci/cases/pr/C48mx500_3DVarAOWCDA.yaml

+22
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
experiment:
2+
system: gfs
3+
mode: cycled
4+
5+
arguments:
6+
pslot: {{ 'pslot' | getenv }}
7+
app: S2S
8+
resdetatmos: 48
9+
resdetocean: 5.0
10+
comroot: {{ 'RUNTESTS' | getenv }}/COMROOT
11+
expdir: {{ 'RUNTESTS' | getenv }}/EXPDIR
12+
icsdir: {{ 'ICSDIR_ROOT' | getenv }}/C48mx500
13+
idate: 2021032412
14+
edate: 2021032418
15+
nens: 0
16+
gfs_cyc: 0
17+
start: warm
18+
yaml: {{ HOMEgfs }}/ci/cases/yamls/soca_gfs_defaults_ci.yaml
19+
20+
skip_ci_on_hosts:
21+
- orion
22+
- hercules

ci/cases/yamls/build.yaml

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
builds:
2+
- gefs: './build_all.sh'
3+
- gfs: './build_all.sh -gu'
+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
defaults:
2+
!INC {{ HOMEgfs }}/parm/config/gfs/yaml/defaults.yaml
3+
base:
4+
ACCOUNT: {{ 'SLURM_ACCOUNT' | getenv }}
5+
DO_JEDIOCNVAR: "YES"

ci/scripts/run-check_ci.sh

+3-1
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,9 @@ pslot=${2:-${pslot:-?}} # Name of the experiment being tested by this scr
2121
# │   └── ${pslot}
2222
# └── EXPDIR
2323
# └── ${pslot}
24-
HOMEgfs="${TEST_DIR}/HOMEgfs"
24+
# Two system build directories created at build time gfs, and gdas
25+
# TODO: Make this configurable (for now all scripts run from gfs for CI at runtime)
26+
HOMEgfs="${TEST_DIR}/gfs"
2527
RUNTESTS="${TEST_DIR}/RUNTESTS"
2628

2729
# Source modules and setup logging

ci/scripts/utils/ci_utils.sh

+110-14
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,120 @@
11
#!/bin/env bash
22

3-
function cancel_slurm_jobs() {
3+
function determine_scheduler() {
4+
if command -v sbatch &> /dev/null; then
5+
echo "slurm";
6+
elif command -v qsub &> /dev/null; then
7+
echo "torque";
8+
else
9+
echo "unknown"
10+
fi
11+
}
412

5-
# Usage: cancel_slurm_jobs <substring>
6-
# Example: cancel_slurm_jobs "C48_ATM_3c4e7f74"
13+
function cancel_batch_jobs() {
14+
# Usage: cancel_batch_jobs <substring>
15+
# Example: cancel_batch_jobs "C48_ATM_3c4e7f74"
716
#
8-
# Cancel all Slurm jobs that have the given substring in their name
17+
# Cancel all batch jobs that have the given substring in their name
918
# So like in the example all jobs with "C48_ATM_3c4e7f74"
1019
# in their name will be canceled
1120

1221
local substring=$1
1322
local job_ids
14-
job_ids=$(squeue -u "${USER}" -h -o "%i")
15-
16-
for job_id in ${job_ids}; do
17-
job_name=$(sacct -j "${job_id}" --format=JobName%100 | head -3 | tail -1 | sed -r 's/\s+//g') || true
18-
if [[ "${job_name}" =~ ${substring} ]]; then
19-
echo "Canceling Slurm Job ${job_name} with: scancel ${job_id}"
20-
scancel "${job_id}"
21-
continue
22-
fi
23-
done
23+
24+
scheduler=$(determine_scheduler)
25+
26+
if [[ "${schduler}" == "torque" ]]; then
27+
job_ids=$(qstat -u "${USER}" | awk '{print $1}') || true
28+
29+
for job_id in ${job_ids}; do
30+
job_name=$(qstat -f "${job_id}" | grep Job_Name | awk '{print $3}') || true
31+
if [[ "${job_name}" =~ ${substring} ]]; then
32+
echo "Canceling PBS Job ${job_name} with: qdel ${job_id}"
33+
qdel "${job_id}"
34+
continue
35+
fi
36+
done
37+
38+
elif [[ "${scheduler}" == "slurm" ]]; then
39+
40+
job_ids=$(squeue -u "${USER}" -h -o "%i")
41+
42+
for job_id in ${job_ids}; do
43+
job_name=$(sacct -j "${job_id}" --format=JobName%100 | head -3 | tail -1 | sed -r 's/\s+//g') || true
44+
if [[ "${job_name}" =~ ${substring} ]]; then
45+
echo "Canceling Slurm Job ${job_name} with: scancel ${job_id}"
46+
scancel "${job_id}"
47+
continue
48+
fi
49+
done
50+
51+
else
52+
echo "FATAL: Unknown/unsupported job scheduler"
53+
exit 1
54+
fi
55+
}
56+
57+
58+
function get_pr_case_list () {
59+
60+
#############################################################
61+
# loop over every yaml file in the PR's ci/cases
62+
# and create an run directory for each one for this PR loop
63+
#############################################################
64+
for yaml_config in "${HOMEgfs}/ci/cases/pr/"*.yaml; do
65+
case=$(basename "${yaml_config}" .yaml) || true
66+
echo "${case}"
67+
done
68+
}
69+
70+
function get_pslot_list () {
71+
72+
local RUNTESTS="${1}"
73+
74+
#############################################################
75+
# loop over expdir directories in RUNTESTS
76+
# and create list of the directory names (pslot) with the hash tag
77+
#############################################################
78+
for pslot_dir in "${RUNTESTS}/EXPDIR/"*; do
79+
pslot=$(basename "${pslot_dir}") || true
80+
echo "${pslot}"
81+
done
82+
83+
}
84+
85+
function get_pslot () {
86+
87+
local RUNTESTS="${1}"
88+
local case="${2}"
89+
90+
#############################################################
91+
# loop over expdir directories in RUNTESTS
92+
# and return the name of the pslot with its tag that matches the case
93+
#############################################################
94+
for pslot_dir in "${RUNTESTS}/EXPDIR/"*; do
95+
pslot=$(basename "${pslot_dir}")
96+
check_case=$(echo "${pslot}" | rev | cut -d"_" -f2- | rev) || true
97+
if [[ "${check_case}" == "${case}" ]]; then
98+
echo "${pslot}"
99+
break
100+
fi
101+
done
102+
103+
}
104+
105+
function create_experiment () {
106+
107+
local yaml_config="${1}"
108+
cd "${HOMEgfs}" || exit 1
109+
pr_sha=$(git rev-parse --short HEAD)
110+
case=$(basename "${yaml_config}" .yaml) || true
111+
export pslot=${case}_${pr_sha}
112+
113+
source "${HOMEgfs}/ci/platforms/config.${MACHINE_ID}"
114+
source "${HOMEgfs}/workflow/gw_setup.sh"
115+
116+
# system=$(grep "system:" "${yaml_config}" | cut -d":" -f2 | tr -d " ") || true
117+
118+
"${HOMEgfs}/${system}/workflow/create_experiment.py" --overwrite --yaml "${yaml_config}"
119+
24120
}

ci/scripts/utils/ci_utils_wrapper.sh

+9
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
#!/usr/bin/env bash
2+
3+
HOMEgfs="$(cd "$(dirname "${BASH_SOURCE[0]}")/../../.." >/dev/null 2>&1 && pwd )"
4+
source "${HOMEgfs}/ush/detect_machine.sh"
5+
6+
utitilty_function="${1}"
7+
8+
source "${HOMEgfs}/ci/scripts/utils/ci_utils.sh"
9+
${utitilty_function} "${@:2}"

0 commit comments

Comments
 (0)