-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfuncrvp_training.smk
executable file
·74 lines (68 loc) · 3.16 KB
/
funcrvp_training.smk
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
# --- Configuration ---
configfile: "config.yaml"
# --- Rules ---
# --- One Rule to Rule Them All ---
rule all:
input:
f"{config['OUTPUT_DIR']}/{config['STUDY_NAME']}_genes_extended.pq",
f"{config['OUTPUT_DIR']}/{config['STUDY_NAME']}_predictions_extended.pq"
# expand(f"{config['OUTPUT_DIR']}/{{trait}}_mean_betas.pq", trait=config.get("TRAITS", [])),
# expand(f"{config['OUTPUT_DIR']}/{{trait}}_bayes_pred.pq", trait=config.get("TRAITS", [])),
# --- Train Model ---
rule train_model_trait:
output:
result_betas=f"{config['OUTPUT_DIR']}/{{trait}}_mean_betas.pq",
result_preds=f"{config['OUTPUT_DIR']}/{{trait}}_bayes_pred.pq"
params:
epochs=25 # Default parameters
threads: 16
resources:
mem_mb=128000,
gpu="a40:1",
exclude="ouga12"
log:
f"{config['WANDB_LOGS_DIR']}/{{trait}}_train_model.log"
shell:
"""
scripts/train_model.py --trait {wildcards.trait} \
--epochs {params.epochs} \
--study_name {config['STUDY_NAME']} \
--output_dir {config['OUTPUT_DIR']} \
--hyperparam_version {config['HO_VERSION']} \
--optuna_dir {config['OPTUNA_DIR']} \
--logs_dir {config['WANDB_LOGS_DIR']} \
> {log} 2>&1
"""
# --- Compile Results ---
rule compile_results:
input:
all_beta_results=expand(f"{config['OUTPUT_DIR']}/{{trait}}_mean_betas.pq", trait=config.get("TRAITS", [])),
all_pred_results=expand(f"{config['OUTPUT_DIR']}/{{trait}}_bayes_pred.pq", trait=config.get("TRAITS", [])),
output:
compiled_genes_results=f"{config['OUTPUT_DIR']}/{config['STUDY_NAME']}_genes_extended.pq",
compiled_pheno_results=f"{config['OUTPUT_DIR']}/{config['STUDY_NAME']}_predictions_extended.pq"
log:
f"{config['LOGS_DIR']}/compile_results.log"
shell:
"""
scripts/consolidate_results.py --output_genes {output.compiled_genes_results} \
--output_pheno {output.compiled_pheno_results} \
--study_name {config['STUDY_NAME']} \
--embedding {config['EMBEDDING']} \
--genotype {config['GENOTYPE']} \
--test_split_size {config['TEST_SPLIT']} \
--logs_dir {config['LOGS_DIR']} \
--all_beta_results "{input.all_beta_results}" \
--all_pred_results "{input.all_pred_results}" \
--traits '{config["TRAITS"]}' \
> {log} 2>&1
"""
# --- Create Plots ---
# --- Figure 3 ---
# rule create_plot_1:
# input:
# model = f"{config['TRAINING_DIR']}/{{trait}}/model.pkl" #Update if hyperopt was used. Select the best model.
# output:
# plot1=f"{config['PLOTS_DIR']}/plot1.pdf"
# script:
# "scripts/create_plot_1.py"