h2020charisma · vedina · Nov 3, 2025 · Nov 3, 2025 · Nov 3, 2025 · Nov 4, 2025
diff --git a/pyproject.toml b/pyproject.toml
@@ -33,3 +33,6 @@ dependencies = [
     "tables~=3.9.2",
 ]
 
+[tool.uv.sources]
+ramanchada2 = { path = "../ramanchada2", editable = true }
+
diff --git a/src/calibration_analysis.py b/src/calibration_analysis.py
@@ -0,0 +1,78 @@
+import pandas as pd
+from matched_peaks_analysis import (
+    analyze_peak_matching_quality,
+    compare_before_after_calibration,
+    analyze_systematic_vs_random_errors,
+    plot_calibration_analysis
+)
+from IPython.display import display
+import matplotlib.pyplot as plt
+from utils import (
+    toc, toc_anchor, toc_entry, toc_link, toc_heading, toc_collapsible,
+    get_config_units, init_logging
+    )
+import traceback
+from pathlib import Path
+
+
+# + tags=["parameters"]
+product = None
+upstream = None
+exclude = "P6_0301"
+context = ""
+sample_peaks = None
+# -
+
+logger = init_logging(Path(product["nb"]).parent , f"calibration_analysis.log")
+
+
+toc_heading(f"Calibration analysis","h1")
+
+toc_collapsible("Details", context)
+
+matched_peaks = None
+for key in upstream["spectracal_*"].keys():
+    matched_peaks_file = upstream["spectracal_*"][key]["matched_peaks"]
+    _matched_peaks = pd.read_csv(matched_peaks_file)
+    if matched_peaks is None:
+        matched_peaks = _matched_peaks
+    else:
+        matched_peaks = pd.concat([matched_peaks, _matched_peaks])
+
+
+matched_peaks_file = upstream["calibration_verify_xy"]["matched_peaks"]
+#matched_peaks_file = sample_peaks
+_matched_peaks = pd.read_csv(matched_peaks_file)
+matched_peaks = _matched_peaks if matched_peaks is None else pd.concat([matched_peaks, _matched_peaks])
+
+
+try:
+    # Run matched peak analyses
+    mp_filtered = matched_peaks.loc[matched_peaks["key"] != exclude]
+    samples = matched_peaks["sample"].unique()
+    for sample in samples:
+        units = "nm" if sample == "Ne" else "cm-1"
+        mp = mp_filtered.loc[mp_filtered["sample"] == sample]
+        toc_heading(f"{sample} peak calibration analysis","h2")        
+        toc_heading(f"Analyze {sample} peak matching quality","h3")
+        summary = analyze_peak_matching_quality(mp, units=units)
+        toc_collapsible("Table", summary._repr_html_())
+        toc_heading(f"Compare {sample} peaks before/after calibration","h3")
+        comparison = compare_before_after_calibration(mp, units=units)
+        toc_collapsible("Table", comparison._repr_html_())
+        toc_heading(f"Analyze {sample} peaks systematic vs random errors","h3")
+        systematic_analysis = analyze_systematic_vs_random_errors(mp, units=units)
+        toc_collapsible("Table", systematic_analysis._repr_html_())
+        # Visualize
+        toc_heading(f"Visualisation","h3")
+        try:
+            fig = plot_calibration_analysis(
+                mp,  units=units, output_path=product["analysis"])
+            plt.show()   
+        except Exception as err:
+            logger.error(err)
+            traceback.print_exc()
+
+    matched_peaks.to_csv(product["matched_peaks"], index=False)
+except Exception as err:
+    traceback.print_exc()
diff --git a/src/calibration_verify.py b/src/calibration_verify.py
@@ -3,6 +3,7 @@
 from IPython.display import display
 import matplotlib.pyplot as plt
 from ramanchada2.protocols.calibration.calibration_model import CalibrationModel
+from ramanchada2.protocols.calibration.xcalibration import match_peaks4analysis
 from utils import (find_peaks, plot_si_peak, load_config, unicode_unit,
                    get_config_findkw, plot_biclustering, plot_spectra_heatmaps)
 from sklearn.metrics.pairwise import cosine_similarity
@@ -11,8 +12,12 @@
 import traceback
 import pickle
 from utils import (
-    toc, toc_anchor, toc_entry, toc_link, toc_heading, toc_collapsible
+    toc, toc_anchor, toc_entry, toc_link, toc_heading, toc_collapsible,
+    get_config_units, init_logging
     )
+import glob
+from pathlib import Path
+
 
 
 # + tags=["parameters"]
@@ -25,14 +30,34 @@
 pst_tag = None
 calcite_tag = None
 mode = None
+match_mode = None
+interpolator = None
 # -
 
+logger = init_logging(Path(product["nb"]).parent , f"calibration_verify_{mode}.log")
 _config = load_config(os.path.join(config_root, config_templates))
 warnings.filterwarnings('ignore')
 
-def plot_model(calmodel, entry, laser_wl, optical_path, spe_sils=None):
+
+def get_reference_peaks(tag):
+    print(tag)
+    _lookup_cm1 = {
+        "Si": {520.45 : 1},
+        "S0B":  {520.45 : 1},
+        "S0P":  {520.45 : 1},
+        "S0N":  {520.45 : 1},
+        "S1N":  {520.45 : 1},
+        #"PST": {620.9:16, 795.8:10, 1001.4:100,  1031.8:27, 1155.3:13, 1450.5:8,  1583.1:12, 1602.3:28, 2852.4:9, 2904.5:13},
+        "CAL": {155.21:1, 281.26:1, 711.95:1, 1085.91:1, 1435.22:1, 1748.91:1}
+    }
+    return _lookup_cm1.get(tag, None)
+
+def get_profile(tag):
+    return "Pearson4" if tag in ["Si", "S0B", "S0N"] else "Gaussian"
+
+def plot_model(calmodel, entry, laser_wl, optical_path, spe_sils=None, spe_units=None):
     fig, (ax, ax1, ax2) = plt.subplots(1, 3, figsize=(15, 3))
-    # print(modelfile, tags)
+    logger.debug(f"{modelfile} {tags}")
     calmodel.components[0].model.plot(ax=ax)
     fig.suptitle(f"[{entry}] {laser_wl}nm {optical_path}")
     calmodel.plot(ax=ax1)
@@ -41,7 +66,7 @@ def plot_model(calmodel, entry, laser_wl, optical_path, spe_sils=None):
     ax1.axvline(x=si_peak, color='black', linestyle='--', linewidth=2, label="Si peak {:.3f} nm".format(si_peak))    
     if spe_sils is not None:
         for spe_sil in spe_sils:
-            sil_calibrated = calmodel.apply_calibration_x(spe_sil)
+            sil_calibrated = calmodel.apply_calibration_x(spe_sil, spe_units=spe_units )
             try:
                 fitres, cand = find_peaks(sil_calibrated,
                                           profile="Pearson4",
@@ -50,7 +75,7 @@ def plot_model(calmodel, entry, laser_wl, optical_path, spe_sils=None):
                                           )
             except Exception as err:
                 fitres = None
-                print(err)
+                logger.error(err)
             plot_si_peak(spe_sil, sil_calibrated, fitres=fitres, ax=ax2)
 
 
@@ -95,9 +120,13 @@ def plot_distances(pairwise_distances, identifiers):
 
 original = {}
 calibrated = {}
+
+matched_peaks = None
 for key in upstream["spectracal_*"].keys():
+
     entry = key.replace("spectracal_","")
     key_frame = key.replace("spectracal","spectraframe")
+
     data_file = upstream["spectraframe_*"][key_frame]["h5"]
     spectra_frame = pd.read_hdf(data_file, key="templates_read")
     df_bkg_substracted = spectra_frame.loc[spectra_frame["background"] == "BACKGROUND_SUBTRACTED"]
@@ -107,22 +136,33 @@ def plot_distances(pairwise_distances, identifiers):
     for modelfile in pkl_files:
         tags = os.path.basename(modelfile).replace(".pkl", "").split("_")
         optical_path = tags[2]
-        laser_wl = int(tags[1])        
+        laser_wl = int(tags[1])
         calmodel = CalibrationModel.from_file(os.path.join(folder_path, modelfile))
 
         if mode == "xy":
-            with open(os.path.join(folder_path_ycal, f"ycalmodel_{laser_wl}_{optical_path}.pkl"), "rb") as f:
-                ycalmodel = pickle.load(f)
+            pattern = os.path.join(folder_path_ycal, f"ycalmodel_{laser_wl}_{optical_path}_*.pkl")
+            pkl_files = glob.glob(pattern)
+            ycalmodels = []
+            for pkl_file in pkl_files:
+                logger.debug(pkl_file)
+                with open(pkl_file, "rb") as f:
+                    ycalmodels.append(pickle.load(f))
+            logger.debug(f"[{key}] Number of relative intensity calibration models: {len(ycalmodels)}")
+            if len(ycalmodels) == 0:
+                continue
         else:
-            ycalmodel = None
+            ycalmodels = None
 
         op_data = df_bkg_substracted.loc[df_bkg_substracted["optical_path"] == optical_path]
         spe_sum = None
-        spe_sil = average_spe(op_data, si_tag).trim_axes(method='x-axis', 
-                                                        boundaries=(520.45-50, 520.45+50))
+        spe_sil = average_spe(op_data, si_tag)
         if spe_sil is None:
             continue
-        plot_model(calmodel, entry, laser_wl, optical_path, [spe_sil])
+        si_units = get_config_units(_config, key, tag="si")
+        if si_units == "cm-1":
+            spe_sil = spe_sil.trim_axes(method='x-axis', boundaries=(520.45-50, 520.45+50))
+
+        plot_model(calmodel, entry, laser_wl, optical_path, [spe_sil],spe_units=si_units )
         fig, (ax, ax1, ax2, ax3) = plt.subplots(1, 4, figsize=(15, 3))
         _id = f"[{entry}] {laser_wl}nm {optical_path}"
         fig.suptitle(_id)
@@ -149,17 +189,18 @@ def plot_distances(pairwise_distances, identifiers):
                 else:
                     spe = spe.trim_axes(method='x-axis', boundaries=boundaries)
 
-                #print(spe.y_noise_MAD())
+                logger.debug(f"{entry} {laser_wl} {optical_path} {tag} spe.y_noise_MAD() {spe.y_noise_MAD()}")
                 # remove pedestal
                 spe.y = spe.y - np.min(spe.y)
                 # remove baseline
                 spe = spe.subtract_baseline_rc1_snip(niter=40)
                 # x calibration
                 spe_xcalibrated = calmodel.apply_calibration_x(spe)
                 # y calibration
-                if ycalmodel is None:
+                if ycalmodels is None:
                     spe_calibrated = spe_xcalibrated
                 else:
+                    ycalmodel = ycalmodels[0]
                     spe_xcalibrated = spe_xcalibrated.trim_axes(method="x-axis", boundaries=ycalmodel.ref.raman_shift)
                     spe_calibrated = ycalmodel.process(spe_xcalibrated)
 
@@ -172,6 +213,37 @@ def plot_distances(pairwise_distances, identifiers):
                     x_range=boundaries, xnew_bins=bins, spline=spline)
                 #spe_cal_resampled = spe_cal_resampled.subtract_baseline_rc1_snip(niter=40).normalize(strategy=strategy)
 
+                if ycalmodels is not None:
+                    spe_xcal_resampled = spe_xcalibrated.resample_spline_filter(
+                            x_range=boundaries, xnew_bins=bins, spline=spline)                    
+                    _spectra = [spe, spe_xcalibrated, spe_calibrated]
+                    _stages = ["1.original","2.x-clbr","3.y-clbr"]
+                else:
+                    _spectra = [spe, spe_calibrated]
+                    _stages = ["1.original","2.x-clbr"]
+                _refs = get_reference_peaks(tag)
+                if _refs is not None:       
+                    logger.info(f"{entry} {laser_wl} {optical_path} match_peaks4analysis {tag}")             
+                    df_calib = match_peaks4analysis(
+                        spectra =_spectra,
+                        ref=_refs,                        
+                        spe_units="cm-1",
+                        find_kw={}, 
+                        fit_peaks_kw={}, 
+                        profile=get_profile(tag), 
+                        should_fit=True,
+                        match_method = match_mode,
+                        stages=_stages
+                    )
+                else:
+                    df_calib = None
+
+                if df_calib is not None:
+                    df_calib["key"] = entry
+                    df_calib["sample"] = tag
+                    df_calib["optical_path"] = optical_path
+                    df_calib["laser_wl"] = laser_wl
+                    matched_peaks = df_calib if matched_peaks is None else pd.concat([matched_peaks, df_calib])
 
                 if plot_resampled:
                     spe_resampled.plot(ax=axis, label=tag)   
@@ -198,6 +270,8 @@ def plot_distances(pairwise_distances, identifiers):
                 traceback.print_exc()
             axis.grid()
 
+matched_peaks.to_csv(product["matched_peaks"], index=False)
+
 labels = ["original", f"{mode}-calibrated"]
 
 toc_heading(f"Comparison of spectra before and after {mode}-calibration","h2")
@@ -220,8 +294,7 @@ def plot_distances(pairwise_distances, identifiers):
     toc_heading(tag, "h2")
     id_calibrated = calibrated[tag]["id"]
     if len(id_calibrated) <= 1:
-        print(f"{tag}: At least 2 optical paths needed to compare calibration results, "
-              f"found {len(id_calibrated)}: {id_calibrated}")
+        logger.warning(f"{tag}: At least 2 optical paths needed to compare calibration results, found {len(id_calibrated)}: {id_calibrated}")
 
     y_original = np.array(original[tag]["y"])
     y_calibrated = np.array(calibrated[tag]["y"])
@@ -230,8 +303,7 @@ def plot_distances(pairwise_distances, identifiers):
     wavelength = original[tag].get("x", np.arange(y_original.shape[1]))  # assume x-values stored here
 
     if len(y_original) != len(y_calibrated):
-        print(f"Warning: {tag} - different number of spectra "
-              f"(original={len(y_original)}, calibrated={len(y_calibrated)})")
+        logger.warning(f"Warning: {tag} - different number of spectra (original={len(y_original)}, calibrated={len(y_calibrated)})")
 
     ids = [id_original, id_calibrated]
 
@@ -271,10 +343,10 @@ def plot_distances(pairwise_distances, identifiers):
                                     title=labels[index], ax=ax_biclust)
                     fig.tight_layout(rect=[0, 0, 1, 0.95])  # leave space for title
                 except Exception as e:
-                    print(f"Biclustering plot failed for {tag} ({labels[index]}): {e}")
+                    logger.error(f"Biclustering plot failed for {tag} ({labels[index]}): {e}")
 
             except Exception as e:
-                print(f"Error computing cosine similarity for tag {tag} ({labels[index]}): {e}")
+                logger.error(f"Error computing cosine similarity for tag {tag} ({labels[index]}): {e}")
                 traceback.print_exc()
 
     toc_heading("Spectra overlay plot", "h3")
@@ -322,4 +394,5 @@ def plot_distances(pairwise_distances, identifiers):
         plt.show()
 
     except Exception as e:
-        print(f"Failed to plot spectra for {tag}: {e}")
+        logger.error(f"Failed to plot spectra for {tag}: {e}")
+