Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
18eb70a
make use of find_kw for Ne
vedina Nov 3, 2025
fbaa6ab
tags should not be hard coded
vedina Nov 3, 2025
f9b0f28
prind find_kw options
vedina Nov 3, 2025
7160472
better release handling
vedina Nov 4, 2025
c20bbbd
plot calcie as well
vedina Nov 4, 2025
52deec2
some entries may not have SRM
vedina Nov 4, 2025
3a9917d
proper check for release processed folder
vedina Nov 5, 2025
4e587ef
added test task for Ne peak fitting and matching
vedina Nov 21, 2025
ca4facf
with cost matrix vis
vedina Nov 21, 2025
67d194d
with units, option and intensity for matching peaks
vedina Nov 22, 2025
4353c57
looks line a solution
vedina Nov 22, 2025
8ca468f
xcalibration updates
vedina Nov 28, 2025
090ad64
optional test_offset to test the calibraiton procedure with artificia…
vedina Jan 11, 2026
5ebffd2
correct shift and clip
vedina Jan 11, 2026
04f5d1f
trim_axes
vedina Jan 11, 2026
a96ec4b
quantile match
vedina Jan 12, 2026
eda347e
clip edge case handling
vedina Jan 13, 2026
62dae1d
match diagnostics
vedina Jan 13, 2026
c4a5bf1
loggging
vedina Jan 14, 2026
0ab91f7
matched_peaks_analysis
vedina Jan 14, 2026
05783f0
comparison of Ne peaks before/after calibration
vedina Jan 14, 2026
db71810
matched peaks analysis
vedina Jan 14, 2026
2df56dc
calibration analysis in a separate task
vedina Jan 15, 2026
b8870f7
samples calibration analysis by matched peaks
vedina Jan 15, 2026
d31e72a
units
vedina Jan 15, 2026
d6830f4
analysis with external file
vedina Jan 15, 2026
a658a16
visualisaiton fixes
vedina Jan 15, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -33,3 +33,6 @@ dependencies = [
"tables~=3.9.2",
]

[tool.uv.sources]
ramanchada2 = { path = "../ramanchada2", editable = true }

78 changes: 78 additions & 0 deletions src/calibration_analysis.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
import pandas as pd
from matched_peaks_analysis import (
analyze_peak_matching_quality,
compare_before_after_calibration,
analyze_systematic_vs_random_errors,
plot_calibration_analysis
)
from IPython.display import display
import matplotlib.pyplot as plt
from utils import (
toc, toc_anchor, toc_entry, toc_link, toc_heading, toc_collapsible,
get_config_units, init_logging
)
import traceback
from pathlib import Path


# + tags=["parameters"]
product = None
upstream = None
exclude = "P6_0301"
context = ""
sample_peaks = None
# -

logger = init_logging(Path(product["nb"]).parent , f"calibration_analysis.log")


toc_heading(f"Calibration analysis","h1")

toc_collapsible("Details", context)

matched_peaks = None
for key in upstream["spectracal_*"].keys():
matched_peaks_file = upstream["spectracal_*"][key]["matched_peaks"]
_matched_peaks = pd.read_csv(matched_peaks_file)
if matched_peaks is None:
matched_peaks = _matched_peaks
else:
matched_peaks = pd.concat([matched_peaks, _matched_peaks])


matched_peaks_file = upstream["calibration_verify_xy"]["matched_peaks"]
#matched_peaks_file = sample_peaks
_matched_peaks = pd.read_csv(matched_peaks_file)
matched_peaks = _matched_peaks if matched_peaks is None else pd.concat([matched_peaks, _matched_peaks])


try:
# Run matched peak analyses
mp_filtered = matched_peaks.loc[matched_peaks["key"] != exclude]
samples = matched_peaks["sample"].unique()
for sample in samples:
units = "nm" if sample == "Ne" else "cm-1"
mp = mp_filtered.loc[mp_filtered["sample"] == sample]
toc_heading(f"{sample} peak calibration analysis","h2")
toc_heading(f"Analyze {sample} peak matching quality","h3")
summary = analyze_peak_matching_quality(mp, units=units)
toc_collapsible("Table", summary._repr_html_())
toc_heading(f"Compare {sample} peaks before/after calibration","h3")
comparison = compare_before_after_calibration(mp, units=units)
toc_collapsible("Table", comparison._repr_html_())
toc_heading(f"Analyze {sample} peaks systematic vs random errors","h3")
systematic_analysis = analyze_systematic_vs_random_errors(mp, units=units)
toc_collapsible("Table", systematic_analysis._repr_html_())
# Visualize
toc_heading(f"Visualisation","h3")
try:
fig = plot_calibration_analysis(
mp, units=units, output_path=product["analysis"])
plt.show()
except Exception as err:
logger.error(err)
traceback.print_exc()

matched_peaks.to_csv(product["matched_peaks"], index=False)
except Exception as err:
traceback.print_exc()
115 changes: 94 additions & 21 deletions src/calibration_verify.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from IPython.display import display
import matplotlib.pyplot as plt
from ramanchada2.protocols.calibration.calibration_model import CalibrationModel
from ramanchada2.protocols.calibration.xcalibration import match_peaks4analysis
from utils import (find_peaks, plot_si_peak, load_config, unicode_unit,
get_config_findkw, plot_biclustering, plot_spectra_heatmaps)
from sklearn.metrics.pairwise import cosine_similarity
Expand All @@ -11,8 +12,12 @@
import traceback
import pickle
from utils import (
toc, toc_anchor, toc_entry, toc_link, toc_heading, toc_collapsible
toc, toc_anchor, toc_entry, toc_link, toc_heading, toc_collapsible,
get_config_units, init_logging
)
import glob
from pathlib import Path



# + tags=["parameters"]
Expand All @@ -25,14 +30,34 @@
pst_tag = None
calcite_tag = None
mode = None
match_mode = None
interpolator = None
# -

logger = init_logging(Path(product["nb"]).parent , f"calibration_verify_{mode}.log")
_config = load_config(os.path.join(config_root, config_templates))
warnings.filterwarnings('ignore')

def plot_model(calmodel, entry, laser_wl, optical_path, spe_sils=None):

def get_reference_peaks(tag):
print(tag)
_lookup_cm1 = {
"Si": {520.45 : 1},
"S0B": {520.45 : 1},
"S0P": {520.45 : 1},
"S0N": {520.45 : 1},
"S1N": {520.45 : 1},
#"PST": {620.9:16, 795.8:10, 1001.4:100, 1031.8:27, 1155.3:13, 1450.5:8, 1583.1:12, 1602.3:28, 2852.4:9, 2904.5:13},
"CAL": {155.21:1, 281.26:1, 711.95:1, 1085.91:1, 1435.22:1, 1748.91:1}
}
return _lookup_cm1.get(tag, None)

def get_profile(tag):
return "Pearson4" if tag in ["Si", "S0B", "S0N"] else "Gaussian"

def plot_model(calmodel, entry, laser_wl, optical_path, spe_sils=None, spe_units=None):
fig, (ax, ax1, ax2) = plt.subplots(1, 3, figsize=(15, 3))
# print(modelfile, tags)
logger.debug(f"{modelfile} {tags}")
calmodel.components[0].model.plot(ax=ax)
fig.suptitle(f"[{entry}] {laser_wl}nm {optical_path}")
calmodel.plot(ax=ax1)
Expand All @@ -41,7 +66,7 @@ def plot_model(calmodel, entry, laser_wl, optical_path, spe_sils=None):
ax1.axvline(x=si_peak, color='black', linestyle='--', linewidth=2, label="Si peak {:.3f} nm".format(si_peak))
if spe_sils is not None:
for spe_sil in spe_sils:
sil_calibrated = calmodel.apply_calibration_x(spe_sil)
sil_calibrated = calmodel.apply_calibration_x(spe_sil, spe_units=spe_units )
try:
fitres, cand = find_peaks(sil_calibrated,
profile="Pearson4",
Expand All @@ -50,7 +75,7 @@ def plot_model(calmodel, entry, laser_wl, optical_path, spe_sils=None):
)
except Exception as err:
fitres = None
print(err)
logger.error(err)
plot_si_peak(spe_sil, sil_calibrated, fitres=fitres, ax=ax2)


Expand Down Expand Up @@ -95,9 +120,13 @@ def plot_distances(pairwise_distances, identifiers):

original = {}
calibrated = {}

matched_peaks = None
for key in upstream["spectracal_*"].keys():

entry = key.replace("spectracal_","")
key_frame = key.replace("spectracal","spectraframe")

data_file = upstream["spectraframe_*"][key_frame]["h5"]
spectra_frame = pd.read_hdf(data_file, key="templates_read")
df_bkg_substracted = spectra_frame.loc[spectra_frame["background"] == "BACKGROUND_SUBTRACTED"]
Expand All @@ -107,22 +136,33 @@ def plot_distances(pairwise_distances, identifiers):
for modelfile in pkl_files:
tags = os.path.basename(modelfile).replace(".pkl", "").split("_")
optical_path = tags[2]
laser_wl = int(tags[1])
laser_wl = int(tags[1])
calmodel = CalibrationModel.from_file(os.path.join(folder_path, modelfile))

if mode == "xy":
with open(os.path.join(folder_path_ycal, f"ycalmodel_{laser_wl}_{optical_path}.pkl"), "rb") as f:
ycalmodel = pickle.load(f)
pattern = os.path.join(folder_path_ycal, f"ycalmodel_{laser_wl}_{optical_path}_*.pkl")
pkl_files = glob.glob(pattern)
ycalmodels = []
for pkl_file in pkl_files:
logger.debug(pkl_file)
with open(pkl_file, "rb") as f:
ycalmodels.append(pickle.load(f))
logger.debug(f"[{key}] Number of relative intensity calibration models: {len(ycalmodels)}")
if len(ycalmodels) == 0:
continue
else:
ycalmodel = None
ycalmodels = None

op_data = df_bkg_substracted.loc[df_bkg_substracted["optical_path"] == optical_path]
spe_sum = None
spe_sil = average_spe(op_data, si_tag).trim_axes(method='x-axis',
boundaries=(520.45-50, 520.45+50))
spe_sil = average_spe(op_data, si_tag)
if spe_sil is None:
continue
plot_model(calmodel, entry, laser_wl, optical_path, [spe_sil])
si_units = get_config_units(_config, key, tag="si")
if si_units == "cm-1":
spe_sil = spe_sil.trim_axes(method='x-axis', boundaries=(520.45-50, 520.45+50))

plot_model(calmodel, entry, laser_wl, optical_path, [spe_sil],spe_units=si_units )
fig, (ax, ax1, ax2, ax3) = plt.subplots(1, 4, figsize=(15, 3))
_id = f"[{entry}] {laser_wl}nm {optical_path}"
fig.suptitle(_id)
Expand All @@ -149,17 +189,18 @@ def plot_distances(pairwise_distances, identifiers):
else:
spe = spe.trim_axes(method='x-axis', boundaries=boundaries)

#print(spe.y_noise_MAD())
logger.debug(f"{entry} {laser_wl} {optical_path} {tag} spe.y_noise_MAD() {spe.y_noise_MAD()}")
# remove pedestal
spe.y = spe.y - np.min(spe.y)
# remove baseline
spe = spe.subtract_baseline_rc1_snip(niter=40)
# x calibration
spe_xcalibrated = calmodel.apply_calibration_x(spe)
# y calibration
if ycalmodel is None:
if ycalmodels is None:
spe_calibrated = spe_xcalibrated
else:
ycalmodel = ycalmodels[0]
spe_xcalibrated = spe_xcalibrated.trim_axes(method="x-axis", boundaries=ycalmodel.ref.raman_shift)
spe_calibrated = ycalmodel.process(spe_xcalibrated)

Expand All @@ -172,6 +213,37 @@ def plot_distances(pairwise_distances, identifiers):
x_range=boundaries, xnew_bins=bins, spline=spline)
#spe_cal_resampled = spe_cal_resampled.subtract_baseline_rc1_snip(niter=40).normalize(strategy=strategy)

if ycalmodels is not None:
spe_xcal_resampled = spe_xcalibrated.resample_spline_filter(
x_range=boundaries, xnew_bins=bins, spline=spline)
_spectra = [spe, spe_xcalibrated, spe_calibrated]
_stages = ["1.original","2.x-clbr","3.y-clbr"]
else:
_spectra = [spe, spe_calibrated]
_stages = ["1.original","2.x-clbr"]
_refs = get_reference_peaks(tag)
if _refs is not None:
logger.info(f"{entry} {laser_wl} {optical_path} match_peaks4analysis {tag}")
df_calib = match_peaks4analysis(
spectra =_spectra,
ref=_refs,
spe_units="cm-1",
find_kw={},
fit_peaks_kw={},
profile=get_profile(tag),
should_fit=True,
match_method = match_mode,
stages=_stages
)
else:
df_calib = None

if df_calib is not None:
df_calib["key"] = entry
df_calib["sample"] = tag
df_calib["optical_path"] = optical_path
df_calib["laser_wl"] = laser_wl
matched_peaks = df_calib if matched_peaks is None else pd.concat([matched_peaks, df_calib])

if plot_resampled:
spe_resampled.plot(ax=axis, label=tag)
Expand All @@ -198,6 +270,8 @@ def plot_distances(pairwise_distances, identifiers):
traceback.print_exc()
axis.grid()

matched_peaks.to_csv(product["matched_peaks"], index=False)

labels = ["original", f"{mode}-calibrated"]

toc_heading(f"Comparison of spectra before and after {mode}-calibration","h2")
Expand All @@ -220,8 +294,7 @@ def plot_distances(pairwise_distances, identifiers):
toc_heading(tag, "h2")
id_calibrated = calibrated[tag]["id"]
if len(id_calibrated) <= 1:
print(f"{tag}: At least 2 optical paths needed to compare calibration results, "
f"found {len(id_calibrated)}: {id_calibrated}")
logger.warning(f"{tag}: At least 2 optical paths needed to compare calibration results, found {len(id_calibrated)}: {id_calibrated}")

y_original = np.array(original[tag]["y"])
y_calibrated = np.array(calibrated[tag]["y"])
Expand All @@ -230,8 +303,7 @@ def plot_distances(pairwise_distances, identifiers):
wavelength = original[tag].get("x", np.arange(y_original.shape[1])) # assume x-values stored here

if len(y_original) != len(y_calibrated):
print(f"Warning: {tag} - different number of spectra "
f"(original={len(y_original)}, calibrated={len(y_calibrated)})")
logger.warning(f"Warning: {tag} - different number of spectra (original={len(y_original)}, calibrated={len(y_calibrated)})")

ids = [id_original, id_calibrated]

Expand Down Expand Up @@ -271,10 +343,10 @@ def plot_distances(pairwise_distances, identifiers):
title=labels[index], ax=ax_biclust)
fig.tight_layout(rect=[0, 0, 1, 0.95]) # leave space for title
except Exception as e:
print(f"Biclustering plot failed for {tag} ({labels[index]}): {e}")
logger.error(f"Biclustering plot failed for {tag} ({labels[index]}): {e}")

except Exception as e:
print(f"Error computing cosine similarity for tag {tag} ({labels[index]}): {e}")
logger.error(f"Error computing cosine similarity for tag {tag} ({labels[index]}): {e}")
traceback.print_exc()

toc_heading("Spectra overlay plot", "h3")
Expand Down Expand Up @@ -322,4 +394,5 @@ def plot_distances(pairwise_distances, identifiers):
plt.show()

except Exception as e:
print(f"Failed to plot spectra for {tag}: {e}")
logger.error(f"Failed to plot spectra for {tag}: {e}")

Loading
Loading