Skip to content

Commit 9cf2241

Browse files
committedFeb 7, 2025·
Initial Unit Test for the Bruteforce CSA utilities

10 files changed

+199
-0
lines changed
 
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
name: IMPROVE UnitTest Bruteforce CSA workflow
2+
on: [push]
3+
jobs:
4+
test:
5+
runs-on: ubuntu-latest
6+
steps:
7+
- uses: actions/checkout@v4
8+
- name: Install and check
9+
run: |
10+
pip install -e .
11+
python -c "import improvelib; print(improvelib.__version__)"
12+
export PYTHONPATH=$PYTHONPATH:.
13+
cd tests
14+
wget --cut-dirs=8 -P ./ -nH -np -m https://web.cels.anl.gov/projects/IMPROVE_FTP/candle/public/improve/benchmarks/single_drug_drp/benchmark-data-pilot1/csa_data/
15+
python test_bruteforce_csa.py

‎.vscode/settings.json

+11
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
{
2+
"python.testing.unittestArgs": [
3+
"-v",
4+
"-s",
5+
"./tests",
6+
"-p",
7+
"test_*.py"
8+
],
9+
"python.testing.pytestEnabled": false,
10+
"python.testing.unittestEnabled": true
11+
}

‎param_log_file.txt

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{'input_dir': './', 'output_dir': './', 'log_level': 'WARNING', 'config_file': None, 'param_log_file': 'param_log_file.txt', 'data_format': '.parquet', 'model_file_name': 'test_model', 'model_file_format': '.pt', 'epochs': 7, 'learning_rate': 7, 'batch_size': 7, 'val_batch': 64, 'loss': 'mse', 'early_stop_metric': 'mse', 'patience': 20, 'metric_type': 'regression', 'y_col_name': 'auc', 'train_test_var': 10, 'split': [0], 'only_cross_study': False, 'study_number': 1, 'train_percent': 0.8, 'variable_name': ''}

‎tests/csa_bruteforce_params.ini

+49
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
[DEFAULT]
2+
input_dir = ./csa_data/raw_data
3+
y_col_name = auc
4+
only_cross_study = False
5+
uses_cuda_name = True
6+
model_name = GraphDRP
7+
model_scripts_dir = ../../GraphDRP
8+
epochs = 5
9+
10+
; # Optional parameters, uncomment if your script requires these
11+
; cuda_name = cuda:7
12+
; input_supp_data_dir = ./author_data
13+
14+
15+
# Full-scale CSA
16+
; output_dir = ./parsl_csa_exp
17+
; source_datasets = ["CCLE","CTRPv2","gCSI","GDSCv1","GDSCv2"]
18+
; target_datasets = ["CCLE","CTRPv2","gCSI","GDSCv1","GDSCv2"]
19+
; split = ["0","1","2","3","4","5","6","7","8","9"]
20+
21+
# Exp 1a
22+
csa_outdir = ./run_csa_full
23+
source_datasets = ["CCLE", "gCSI"]
24+
target_datasets = ["CCLE", "gCSI"]
25+
split_nums = ["0","1","2","3"]
26+
27+
; # Exp 1b
28+
; csa_outdir = ./bruteforce_exp_1b
29+
; source_datasets = ["gCSI", "CCLE"]
30+
; target_datasets = ["gCSI", "CCLE"]
31+
; split_nums = ["0","1","2","3"]
32+
33+
; # Exp 2
34+
; csa_outdir = ./bruteforce_exp_2
35+
; source_datasets = ["CCLE", "gCSI", "GDSCv2"]
36+
; target_datasets = ["CCLE", "gCSI", "GDSCv2"]
37+
; split_nums = ["0","1"]
38+
39+
# Exp full-scale
40+
; csa_outdir = ./bruteforce_exp_fullscale
41+
; source_datasets = ["CCLE", "CTRPv2", "gCSI", "GDSCv1", "GDSCv2"]
42+
; target_datasets = ["CCLE", "CTRPv2", "gCSI", "GDSCv1", "GDSCv2"]
43+
; split_nums = ["0","1","2","3","4","5","6","7","8","9"]
44+
45+
[Preprocess]
46+
47+
[Train]
48+
49+
[Infer]

‎tests/csa_bruteforce_params_def.py

+64
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
import sys
2+
from pathlib import Path
3+
4+
from improvelib.utils import str2bool
5+
6+
# Global variables
7+
filepath = Path(__file__).resolve().parent # [Req]
8+
9+
csa_bruteforce_params = [
10+
{"name": "cuda_name",
11+
"type": str,
12+
"default": "cuda:0",
13+
"help": "Cuda device name.",
14+
},
15+
{"name": "csa_outdir",
16+
"type": str,
17+
"default": "./run.csa.full",
18+
"help": "Outdir for workflow.",
19+
},
20+
{"name": "source_datasets",
21+
"nargs" : "+",
22+
"type": str,
23+
"default": ['CCLE'],
24+
"help": "source_datasets for cross study analysis"
25+
},
26+
{"name": "target_datasets",
27+
"nargs" : "+",
28+
"type": str,
29+
"default": ["CCLE", "gCSI"],
30+
"help": "target_datasets for cross study analysis"
31+
},
32+
{"name": "split_nums",
33+
"nargs" : "+",
34+
"type": str,
35+
"default": ['0'],
36+
"help": "Split of the source datasets for CSA"
37+
},
38+
{"name": "only_cross_study",
39+
"type": str2bool,
40+
"default": False,
41+
"help": "If only cross study analysis is needed"
42+
},
43+
{"name": "model_name",
44+
"type": str,
45+
"default": 'graphdrp', ## Change the default to LGBM??
46+
"help": "Name of the deep learning model"
47+
},
48+
{"name": "epochs",
49+
"type": int,
50+
"default": 10,
51+
"help": "Number of epochs"
52+
},
53+
{"name": "uses_cuda_name",
54+
"type": str2bool,
55+
"default": True,
56+
"help": "Change to false if the model doesn't have a cuda_name parameter."
57+
},
58+
{"name": "model_scripts_dir",
59+
"type": str,
60+
"default": './',
61+
"help": "Path to the model repository"
62+
},
63+
64+
]

‎tests/csa_test.py

+42
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
"""
2+
Unit tests for the bruteforce_csa workflow.
3+
4+
"""
5+
6+
import unittest
7+
import sys
8+
import os
9+
10+
# Importing params definitions:
11+
from csa_bruteforce_params_def import csa_bruteforce_params
12+
from improvelib.applications.drug_response_prediction.config import DRPPreprocessConfig
13+
from pathlib import Path
14+
filepath = Path(__file__).resolve().parent
15+
16+
csa_bruteforce_params = csa_bruteforce_params
17+
config_file = 'csa_bruteforce_params.ini'
18+
19+
# Load config csa_bruteforce_params.ini
20+
cfg = DRPPreprocessConfig()
21+
params = cfg.initialize_parameters(
22+
pathToModelDir=filepath,
23+
default_config="csa_bruteforce_params.ini",
24+
additional_definitions=csa_bruteforce_params,
25+
required=None
26+
)
27+
28+
class Bruteforce_CSA_test(unittest.TestCase):
29+
def test_params(self):
30+
self.assertEqual(params['cuda_name'], 'cuda:0')
31+
self.assertEqual(params['csa_outdir'], './run_csa_full')
32+
self.assertEqual(params['source_datasets'], ['CCLE', 'gCSI'])
33+
self.assertEqual(params['target_datasets'], ['CCLE', 'gCSI'])
34+
self.assertEqual(params['split_nums'], ["0","1","2","3"])
35+
self.assertEqual(params['only_cross_study'], False)
36+
self.assertEqual(params['model_name'], 'GraphDRP')
37+
self.assertEqual(params['epochs'], 5)
38+
self.assertEqual(params['uses_cuda_name'], True)
39+
# self.assertEqual(params['model_scripts_dir'], './')
40+
41+
if __name__ == '__main__':
42+
unittest.main()

‎tests/param_log_file.txt

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{'input_dir': './csa_data/raw_data', 'output_dir': './', 'log_level': 'WARNING', 'config_file': None, 'param_log_file': 'param_log_file.txt', 'data_format': '.parquet', 'x_data_dir': 'x_data', 'y_data_dir': 'y_data', 'splits_dir': 'splits', 'train_split_file': 'fake', 'val_split_file': 'fake', 'test_split_file': 'fake', 'y_data_files': "[['response.tsv']]", 'x_data_canc_files': "[['cancer_gene_expression.tsv', ['Gene_Symbol']]]", 'x_data_drug_files': "[['drug_SMILES.tsv']]", 'canc_col_name': 'improve_sample_id', 'drug_col_name': 'improve_chem_id', 'y_col_name': 'auc', 'cuda_name': 'cuda:0', 'csa_outdir': './run_csa_full', 'source_datasets': ['CCLE', 'gCSI'], 'target_datasets': ['CCLE', 'gCSI'], 'split_nums': ['0', '1', '2', '3'], 'only_cross_study': False, 'model_name': 'GraphDRP', 'epochs': 5, 'uses_cuda_name': True, 'model_scripts_dir': '../../GraphDRP'}

‎workflows/bruteforce_csa/csa_bruteforce_params_def.py

+6
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,11 @@
1+
import sys
2+
from pathlib import Path
3+
14
from improvelib.utils import str2bool
25

6+
# Global variables
7+
filepath = Path(__file__).resolve().parent # [Req]
8+
39
csa_bruteforce_params = [
410
{"name": "cuda_name",
511
"type": str,
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{'input_dir': './csa_data/raw_data', 'output_dir': './', 'log_level': 'WARNING', 'config_file': None, 'param_log_file': 'param_log_file.txt', 'data_format': '.parquet', 'x_data_dir': 'x_data', 'y_data_dir': 'y_data', 'splits_dir': 'splits', 'train_split_file': 'fake', 'val_split_file': 'fake', 'test_split_file': 'fake', 'y_data_files': "[['response.tsv']]", 'x_data_canc_files': "[['cancer_gene_expression.tsv', ['Gene_Symbol']]]", 'x_data_drug_files': "[['drug_SMILES.tsv']]", 'canc_col_name': 'improve_sample_id', 'drug_col_name': 'improve_chem_id', 'y_col_name': 'auc', 'cuda_name': 'cuda:0', 'csa_outdir': './bruteforce_exp_1a', 'source_datasets': ['CCLE', 'gCSI'], 'target_datasets': ['CCLE', 'gCSI'], 'split_nums': ['0', '1', '2', '3'], 'only_cross_study': False, 'model_name': 'your_model_name', 'epochs': 200, 'uses_cuda_name': False, 'model_scripts_dir': 'path_to_your_model_directory_containing_scripts'}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
[HPO]
2+
model_scripts_dir = ../../../DeepCDR
3+
input_dir = exp_data/
4+
model_name = DeepCDR
5+
model_environment = deepcdr_parsl_env
6+
epochs = 3
7+
output_dir = ./test_DeepCDR
8+
max_evals = 5
9+
interactive_session = True

0 commit comments

Comments
 (0)
Please sign in to comment.