eic · simonge · Jan 8, 2025 · Jan 8, 2025 · Jan 8, 2025 · Jan 8, 2025
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
@@ -125,62 +125,64 @@ get_data:
       - runner_system_failure
 
 include: 
-  - local: 'benchmarks/backgrounds/config.yml'
-  - local: 'benchmarks/backwards_ecal/config.yml'
-  - local: 'benchmarks/calo_pid/config.yml'
-  - local: 'benchmarks/ecal_gaps/config.yml'
-  - local: 'benchmarks/tracking_detectors/config.yml'
-  - local: 'benchmarks/tracking_performances/config.yml'
-  - local: 'benchmarks/tracking_performances_dis/config.yml'
-  - local: 'benchmarks/barrel_ecal/config.yml'
-  - local: 'benchmarks/barrel_hcal/config.yml'
-  - local: 'benchmarks/lfhcal/config.yml'
-  - local: 'benchmarks/zdc/config.yml'
-  - local: 'benchmarks/zdc_lyso/config.yml'
-  - local: 'benchmarks/zdc_neutron/config.yml'
-  - local: 'benchmarks/zdc_photon/config.yml'
-  - local: 'benchmarks/zdc_pi0/config.yml'
-  - local: 'benchmarks/material_scan/config.yml'
-  - local: 'benchmarks/pid/config.yml'
-  - local: 'benchmarks/timing/config.yml'
-  - local: 'benchmarks/b0_tracker/config.yml'
-  - local: 'benchmarks/insert_muon/config.yml'
-  - local: 'benchmarks/insert_tau/config.yml'
-  - local: 'benchmarks/zdc_sigma/config.yml'
-  - local: 'benchmarks/zdc_lambda/config.yml'
-  - local: 'benchmarks/insert_neutron/config.yml'
-  - local: 'benchmarks/femc_electron/config.yml'
-  - local: 'benchmarks/femc_photon/config.yml'
-  - local: 'benchmarks/femc_pi0/config.yml'
+  # - local: 'benchmarks/backgrounds/config.yml'
+  # - local: 'benchmarks/backwards_ecal/config.yml'
+  # - local: 'benchmarks/calo_pid/config.yml'
+  # - local: 'benchmarks/ecal_gaps/config.yml'
+  # - local: 'benchmarks/tracking_detectors/config.yml'
+  # - local: 'benchmarks/tracking_performances/config.yml'
+  # - local: 'benchmarks/tracking_performances_dis/config.yml'
+  # - local: 'benchmarks/barrel_ecal/config.yml'
+  # - local: 'benchmarks/barrel_hcal/config.yml'
+  # - local: 'benchmarks/lfhcal/config.yml'
+  # - local: 'benchmarks/zdc/config.yml'
+  # - local: 'benchmarks/zdc_lyso/config.yml'
+  # - local: 'benchmarks/zdc_neutron/config.yml'
+  # - local: 'benchmarks/zdc_photon/config.yml'
+  # - local: 'benchmarks/zdc_pi0/config.yml'
+  # - local: 'benchmarks/material_scan/config.yml'
+  # - local: 'benchmarks/pid/config.yml'
+  # - local: 'benchmarks/timing/config.yml'
+  # - local: 'benchmarks/b0_tracker/config.yml'
+  # - local: 'benchmarks/insert_muon/config.yml'
+  # - local: 'benchmarks/insert_tau/config.yml'
+  # - local: 'benchmarks/zdc_sigma/config.yml'
+  # - local: 'benchmarks/zdc_lambda/config.yml'
+  # - local: 'benchmarks/insert_neutron/config.yml'
+  # - local: 'benchmarks/femc_electron/config.yml'
+  # - local: 'benchmarks/femc_photon/config.yml'
+  # - local: 'benchmarks/femc_pi0/config.yml'
+  - local: 'benchmarks/lowq2/reconstruction_training/config.yml'
 deploy_results:
   allow_failure: true
   stage: deploy
   needs:
-    - "collect_results:backgrounds"
-    - "collect_results:backwards_ecal"
-    - "collect_results:barrel_ecal"
-    - "collect_results:barrel_hcal"
-    - "collect_results:calo_pid"
-    - "collect_results:ecal_gaps"
-    - "collect_results:lfhcal"
-    - "collect_results:material_scan"
-    - "collect_results:pid"
-    - "collect_results:tracking_performance"
-    - "collect_results:tracking_performance_campaigns"
-    - "collect_results:zdc_sigma"
-    - "collect_results:zdc_lambda"
-    - "collect_results:insert_neutron"
-    - "collect_results:tracking_performances_dis"
-    - "collect_results:zdc"
-    - "collect_results:zdc_lyso"
-    - "collect_results:zdc_neutron"
-    - "collect_results:insert_muon"
-    - "collect_results:insert_tau"
-    - "collect_results:zdc_photon"
-    - "collect_results:zdc_pi0"
-    - "collect_results:femc_electron"
-    - "collect_results:femc_photon"
-    - "collect_results:femc_pi0"
+    # - "collect_results:backgrounds"
+    # - "collect_results:backwards_ecal"
+    # - "collect_results:barrel_ecal"
+    # - "collect_results:barrel_hcal"
+    # - "collect_results:calo_pid"
+    # - "collect_results:ecal_gaps"
+    # - "collect_results:lfhcal"
+    # - "collect_results:material_scan"
+    # - "collect_results:pid"
+    # - "collect_results:tracking_performance"
+    # - "collect_results:tracking_performance_campaigns"
+    # - "collect_results:zdc_sigma"
+    # - "collect_results:zdc_lambda"
+    # - "collect_results:insert_neutron"
+    # - "collect_results:tracking_performances_dis"
+    # - "collect_results:zdc"
+    # - "collect_results:zdc_lyso"
+    # - "collect_results:zdc_neutron"
+    # - "collect_results:insert_muon"
+    # - "collect_results:insert_tau"
+    # - "collect_results:zdc_photon"
+    # - "collect_results:zdc_pi0"
+    # - "collect_results:femc_electron"
+    # - "collect_results:femc_photon"
+    # - "collect_results:femc_pi0"
+    - "collect_results:lowq2_reconstruction_training"
   script:
     - snakemake $SNAKEMAKE_FLAGS --cores 1 results/metadata.json
     - find results -print | sort | tee summary.txt

diff --git a/Snakefile b/Snakefile
@@ -30,27 +30,28 @@ def find_epic_libraries():
     return libs
 
 
-include: "benchmarks/backgrounds/Snakefile"
-include: "benchmarks/backwards_ecal/Snakefile"
-include: "benchmarks/barrel_ecal/Snakefile"
-include: "benchmarks/calo_pid/Snakefile"
-include: "benchmarks/ecal_gaps/Snakefile"
-include: "benchmarks/material_scan/Snakefile"
-include: "benchmarks/tracking_performances/Snakefile"
-include: "benchmarks/tracking_performances_dis/Snakefile"
-include: "benchmarks/lfhcal/Snakefile"
-include: "benchmarks/zdc_lyso/Snakefile"
-include: "benchmarks/zdc_neutron/Snakefile"
-include: "benchmarks/insert_muon/Snakefile"
-include: "benchmarks/zdc_lambda/Snakefile"
-include: "benchmarks/zdc_photon/Snakefile"
-include: "benchmarks/zdc_pi0/Snakefile"
-include: "benchmarks/zdc_sigma/Snakefile"
-include: "benchmarks/insert_neutron/Snakefile"
-include: "benchmarks/insert_tau/Snakefile"
-include: "benchmarks/femc_electron/Snakefile"
-include: "benchmarks/femc_photon/Snakefile"
-include: "benchmarks/femc_pi0/Snakefile"
+# include: "benchmarks/backgrounds/Snakefile"
+# include: "benchmarks/backwards_ecal/Snakefile"
+# include: "benchmarks/barrel_ecal/Snakefile"
+# include: "benchmarks/calo_pid/Snakefile"
+# include: "benchmarks/ecal_gaps/Snakefile"
+# include: "benchmarks/material_scan/Snakefile"
+# include: "benchmarks/tracking_performances/Snakefile"
+# include: "benchmarks/tracking_performances_dis/Snakefile"
+# include: "benchmarks/lfhcal/Snakefile"
+# include: "benchmarks/zdc_lyso/Snakefile"
+# include: "benchmarks/zdc_neutron/Snakefile"
+# include: "benchmarks/insert_muon/Snakefile"
+# include: "benchmarks/zdc_lambda/Snakefile"
+# include: "benchmarks/zdc_photon/Snakefile"
+# include: "benchmarks/zdc_pi0/Snakefile"
+# include: "benchmarks/zdc_sigma/Snakefile"
+# include: "benchmarks/insert_neutron/Snakefile"
+# include: "benchmarks/insert_tau/Snakefile"
+# include: "benchmarks/femc_electron/Snakefile"
+# include: "benchmarks/femc_photon/Snakefile"
+# include: "benchmarks/femc_pi0/Snakefile"
+include: "benchmarks/lowq2/reconstruction_training/Snakefile"
 
 use_s3 = config["remote_provider"].lower() == "s3"
 use_xrootd = config["remote_provider"].lower() == "xrootd"

diff --git a/benchmarks/lowq2/reconstruction_training/ProcessData.py b/benchmarks/lowq2/reconstruction_training/ProcessData.py
@@ -0,0 +1,20 @@
+import uproot
+import awkward as ak
+
+def create_arrays(dataFiles):
+
+    # List of branches to load
+    branches = ["_TaggerTrackerFeatureTensor_shape","_TaggerTrackerFeatureTensor_floatData","_TaggerTrackerTargetTensor_floatData"]
+
+    # Load data from concatenated list of files
+    data = uproot.concatenate([f"{file}:events" for file in dataFiles], branches, library="ak")
+
+    # Filter events with at least one track
+    num_tracks = data["_TaggerTrackerFeatureTensor_shape"][:,0]
+    filtered_data = data[num_tracks == 1]
+
+    input_data = filtered_data["_TaggerTrackerFeatureTensor_floatData"]
+    target_data = filtered_data["_TaggerTrackerTargetTensor_floatData"]
+
+    return input_data, target_data
+
diff --git a/benchmarks/lowq2/reconstruction_training/RegressionModel.py b/benchmarks/lowq2/reconstruction_training/RegressionModel.py
@@ -0,0 +1,103 @@
+import torch
+import torch.nn as nn
+import torch.optim as optim
+import numpy as np
+
+class RegressionModel(nn.Module):
+    def __init__(self):
+        super(RegressionModel, self).__init__()
+        self.fc1  = nn.Linear(4, 512)
+        self.fc2  = nn.Linear(512, 64)
+        self.fc4  = nn.Linear(64, 3)
+        self.input_mean       = torch.tensor([0.0, 0.0, 0.0, 0.0])
+        self.input_std        = torch.tensor([1.0, 1.0, 1.0, 1.0])
+        # self.input_covariance = torch.tensor([[1.0, 0.0, 0.0, 0.0],
+        #                                       [0.0, 1.0, 0.0, 0.0],
+        #                                       [0.0, 0.0, 1.0, 0.0],
+        #                                       [0.0, 0.0, 0.0, 1.0]])
+        self.output_mean = torch.tensor([0.0, 0.0, 0.0])
+        self.output_std  = torch.tensor([1.0, 1.0, 1.0])
+        # self.output_correlation = torch.tensor([[1.0, 0.0, 0.0],
+        #                                         [0.0, 1.0, 0.0],
+        #                                         [0.0, 0.0, 1.0]])
+
+    def forward(self, x):
+        x = (x-self.input_mean)/self.input_std
+        x = torch.tanh(self.fc1(x))
+        x = torch.tanh(self.fc2(x))
+        x = self.fc4(x)
+        x = x*self.output_std + self.output_mean
+        return x
+
+    def adapt(self, input_data, output_data):
+        in_mean = input_data.mean(axis=0)
+        in_std  = input_data.std (axis=0)
+        self.input_mean  = torch.tensor(in_mean)
+        self.input_std   = torch.tensor(in_std)
+
+        # Calculate the correlation matrix of the input data
+        # input_normalized  = (input_data-in_mean)/in_std   
+        # input_correlation = np.corrcoef(input_normalized, rowvar=False)         
+        # Invert the correlation matrix and convert into float tensor
+        # self.input_covariance = torch.tensor(np.linalg.inv(input_correlation).astype(np.float32))
+
+        self.output_mean = torch.tensor(output_data.mean(axis=0))
+        self.output_std  = torch.tensor(output_data.std (axis=0))
+
+def makeModel():
+    # Create the model
+    model = RegressionModel()
+    # Define the optimizer
+    optimizer = optim.Adam(model.parameters(), lr=0.0001)
+    # Define the loss function
+    criterion = nn.MSELoss()
+
+    return model, optimizer, criterion
+
+def trainModel(epochs, train_loader, val_loader):
+
+    # device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    # print(f"Using device: {device}")
+
+    model, optimizer, criterion = makeModel()
+    # model.to(device)
+
+    # Verify that the model parameters are on the GPU
+    # for name, param in model.named_parameters():
+    #     print(f"{name} is on {param.device}")
+
+    # Adapt the model using the training data from the training loader
+    model.adapt(train_loader.dataset.tensors[0].detach().numpy(), train_loader.dataset.tensors[1].detach().numpy())
+
+    for epoch in range(epochs):
+        model.train()
+        running_loss = 0.0
+        for inputs, targets in train_loader:
+            # inputs, targets = inputs.to(device), targets.to(device)
+            optimizer.zero_grad()
+            outputs = model(inputs)
+            loss = criterion(outputs, targets)
+            loss.backward()
+            optimizer.step()
+            running_loss += loss.item() * inputs.size(0)
+
+        epoch_loss = running_loss / len(train_loader.dataset)
+        # print(f"Epoch [{epoch+1}/{epochs}], Loss: {epoch_loss:.4f}")
+
+
+        # Validation step
+        model.eval()
+        val_loss = 0.0
+        with torch.no_grad():
+            for val_inputs, val_targets in val_loader:
+                # val_inputs, val_targets = val_inputs.to(device), val_targets.to(device)
+                val_outputs = model(val_inputs)
+                val_loss += criterion(val_outputs, val_targets).item() * val_inputs.size(0)
+            # val_outputs = model(val_input)
+            # val_loss = criterion(val_outputs, val_target)
+
+        val_loss /= len(val_loader.dataset)
+
+        print(f"Epoch [{epoch+1}/{epochs}], Loss: {loss}, Val Loss: {val_loss}")
+
+    return model
diff --git a/benchmarks/lowq2/reconstruction_training/Snakefile b/benchmarks/lowq2/reconstruction_training/Snakefile
@@ -0,0 +1,95 @@
+import os
+
+# Get the LOWQ2_DATADIR environment variable, or use a default value if it is not set
+outputdir  = os.getenv("LOWQ2_DATADIR", "")
+resultsdir = os.getenv("LOWQ2_RESULTSDIR", "")
+
+# Creates or copies the feature and target tensors for the lowq2 reconstruction training into a new file.
+rule lowq2_tensor_recon:
+    output:
+        f"{outputdir}tensors.eicrecon.tree.edm4eic.root",
+    log:
+        f"{outputdir}tensors.eicrecon.tree.edm4eic.root.log",
+    params:
+        input=expand("root://dtn-eic.jlab.org//work/eic2/EPIC/RECO/24.12.0/epic_craterlake/SIDIS/pythia6-eic/1.0.0/10x100/q2_0to1/pythia_ep_noradcor_10x100_q2_0.000000001_1.0_run{run}.ab.{number:04d}.eicrecon.tree.edm4eic.root", run=range(1,2), number=range(0, 1)),
+        compact_xml="epic_lowq2.xml",
+    shell:  
+        """
+        eicrecon {params.input} -Ppodio:output_file={output} \
+        -Ppodio:output_collections=TaggerTrackerFeatureTensor,TaggerTrackerTargetTensor \
+        -Pplugins_to_ignore=janatop,LUMISPECCAL,ECTOF,BTOF,FOFFMTRK,RPOTS,B0TRK,MPGD,ECTRK,DRICH,DIRC,pid,tracking,acts,EEMC,BEMC,FEMC,EHCAL,BHCAL,FHCAL,B0ECAL,ZDC,BTRK,BVTX,PFRICH,richgeo,evaluator,pid_lut,reco,rootfile \
+        -Pdd4hep:xml_files={params.compact_xml} \
+        """
+
+# Trains a regression model to predict the TaggerTrackerTargetTensor from the TaggerTrackerFeatureTensor.
+rule lowq2_reconstruction_training:
+    input:
+        data=f"{outputdir}tensors.eicrecon.tree.edm4eic.root",
+        script="TaggerRegression.py",
+    output:
+        f"{outputdir}TestTaggerTrackerTransportation.onnx",
+    shell:
+        """
+        python {input.script} --dataFiles {input.data} --outModelFile {output}
+        """
+
+# Runs the inference with the default model.
+rule lowq2_reconstruction_inference:
+    input:
+        data=f"{outputdir}tensors.eicrecon.tree.edm4eic.root",
+    output:
+        f"{outputdir}TaggerInference.eicrecon.tree.edm4eic.root",
+    params:
+        compact_xml="epic_lowq2.xml",
+    shell:
+        """
+        eicrecon {input.data} -Ppodio:output_file={output} \
+        -Ppodio:output_collections=TaggerTrackerFeatureTensor,TaggerTrackerTargetTensor,TaggerTrackerPredictionTensor,TaggerTrackerReconstructedParticles \
+        -Pplugins_to_ignore=janatop,LUMISPECCAL,ECTOF,BTOF,FOFFMTRK,RPOTS,B0TRK,MPGD,ECTRK,DRICH,DIRC,pid,tracking,acts,EEMC,BEMC,FEMC,EHCAL,BHCAL,FHCAL,B0ECAL,ZDC,BTRK,BVTX,PFRICH,richgeo,evaluator,pid_lut,reco,rootfile \
+        -Pdd4hep:xml_files={params.compact_xml} \
+        """
+
+# Check inference runs with the new model.
+rule lowq2_reconstruction_new_inference:
+    input:
+        data=f"{outputdir}tensors.eicrecon.tree.edm4eic.root",
+        model=f"{outputdir}TestTaggerTrackerTransportation.onnx",
+    output:
+        f"{outputdir}TaggerInferenceNew.eicrecon.tree.edm4eic.root",
+    params:
+        compact_xml="epic_lowq2.xml",
+    shell:
+        """
+        eicrecon {input.data} -Ppodio:output_file={output} \
+        -Ppodio:output_collections=TaggerTrackerFeatureTensor,TaggerTrackerTargetTensor,TaggerTrackerPredictionTensor,TaggerTrackerReconstructedParticles \
+        -Pplugins_to_ignore=janatop,LUMISPECCAL,ECTOF,BTOF,FOFFMTRK,RPOTS,B0TRK,MPGD,ECTRK,DRICH,DIRC,pid,tracking,acts,EEMC,BEMC,FEMC,EHCAL,BHCAL,FHCAL,B0ECAL,ZDC,BTRK,BVTX,PFRICH,richgeo,evaluator,pid_lut,reco,rootfile \
+        -Pdd4hep:xml_files={params.compact_xml} \
+        -PLOWQ2:TaggerTrackerTransportationInference:modelPath={input.model} \
+        -Peicrecon:LogLevel=error \
+        """
+
+# Create plots showing the performance of a model.
+rule lowq2_reconstruction_new_plot:
+    input:
+        data=f"{outputdir}tensors.eicrecon.tree.edm4eic.root",
+        model=lambda wildcards: f"{outputdir}{wildcards.model}.onnx",
+    output:
+        directory(expand(f"{resultsdir}{{model}}", model="{model}")),
+    shell:
+        """
+        mkdir -p {output}
+        python TestModel.py --dataFiles {input.data} --modelFile {input.model} --outDir {output}
+        """
+
+# Create plots showing the performance of a model.
+rule lowq2_reconstruction_old_plot:
+    input:
+        data=f"{outputdir}tensors.eicrecon.tree.edm4eic.root",
+        model=lambda wildcards: f"calibrations/onnx/{wildcards.model}.onnx",
+    output:
+        directory(expand(f"{resultsdir}{{model}}", model="{model}")),
+    shell:
+        """
+        mkdir -p {output}
+        python TestModel.py --dataFiles {input.data} --modelFile {input.model} --outDir {output}
+        """