From 00dfb1945c63e3d7c879209da9dc8b181130f4b2 Mon Sep 17 00:00:00 2001 From: Anna Kravchenko Date: Tue, 14 Oct 2025 13:11:47 +0200 Subject: [PATCH 01/11] new parameter alig_fname added to defaults --- src/haddock/modules/analysis/caprieval/defaults.yaml | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/haddock/modules/analysis/caprieval/defaults.yaml b/src/haddock/modules/analysis/caprieval/defaults.yaml index 62821a00e2..87272eedcd 100644 --- a/src/haddock/modules/analysis/caprieval/defaults.yaml +++ b/src/haddock/modules/analysis/caprieval/defaults.yaml @@ -151,6 +151,17 @@ sort_ascending: group: analysis explevel: easy +alig_fname: + default: '' + type: file + title: Alignment filename + short description: Filename of the file containing alignment information, izone format. + long description: Alignment file in izone format containing the mapping between residues. E.g. "ZONE A1:A5" means that + residue 1 of chain A of the model corresponds to residue 5 of chain A or the reference. + Note that if this parameter is provided, the 'alignment_method' parameter is ignored. + group: analysis + explevel: easy + alignment_method: default: sequence type: string From 3cd52890e2417ad7421aecb4fa4cf35f4f27522f Mon Sep 17 00:00:00 2001 From: Anna Kravchenko Date: Tue, 14 Oct 2025 17:18:21 +0200 Subject: [PATCH 02/11] add align_custom to libalign --- src/haddock/libs/libalign.py | 82 +++++++++++++++++++++++++++++++++++- 1 file changed, 80 insertions(+), 2 deletions(-) diff --git a/src/haddock/libs/libalign.py b/src/haddock/libs/libalign.py index a42cd5344e..f5d6b94bf9 100644 --- a/src/haddock/libs/libalign.py +++ b/src/haddock/libs/libalign.py @@ -11,6 +11,7 @@ * :py:func:`pdb2fastadic` * :py:func:`get_atoms` * :py:func:`get_align` +* :pt:func:`align_custom` * :py:func:`align_struct` * :py:func:`align_seq` * :py:func:`make_range` @@ -770,7 +771,7 @@ def pdb2fastadic(pdb_f: PDBPath) -> dict[str, dict[int, str]]: def get_align( - method: str, lovoalign_exec: FilePath + method: str, lovoalign_exec: FilePath, alig_fname: Optional[FilePath] = None ) -> partial[dict[str, dict[int, int]]]: """ Get the alignment function. @@ -783,15 +784,25 @@ def get_align( lovoalign_exec : str Path to the lovoalign executable. + alig_fname : str + Path to the custom alignment file, optional + Returns ------- align_func : functools.partial desired alignment function """ - if method == "structure": + if alig_fname: + if not Path(alig_fname).exists(): + raise FileNotFoundError(f"Custom alignment file {alig_fname} not found") + align_func = partial(align_custom, custom_alig_file = Path(alig_fname)) + + elif method == "structure": align_func = partial(align_strct, lovoalign_exec=lovoalign_exec) + elif method == "sequence": align_func = partial(align_seq) + else: available_alns = ("sequence", "structure") raise ValueError( @@ -801,6 +812,73 @@ def get_align( return align_func +def align_custom( + reference: PDBFile, + model: PDBFile, + output_path: FilePath, + custom_alig_file: FilePath, +) -> tuple[dict[str, dict[int, int]], dict[str, str]]: + """ + Parse custom alignment file to extract numbering relationship. + + Parameters + ---------- + reference : :py:class:`haddock.libs.libontology.PDBFile` + + model : :py:class:`haddock.libs.libontology.PDBFile` + + output_path : Path + + custom_alig_file : Path + Path to the custom .izone alignment file + + Returns + ------- + numbering_dic : dict + dictionary of residue mapping (one per chain) + + model2ref_chain_dict : dict + dictionary of chain mapping + + """ + numbering_dic: dict[str, dict[int, int]] = {} + model2ref_chain_dict: dict[str, str] = {} + + # read align file, skip everything that is not 'ZONE' + with open(custom_alig_file, 'r') as f: + zone_lines = (line.strip() for line in f if line.strip().startswith('ZONE')) + + for line in zone_lines: + if len(line.split()) <2: + log.warning(f"Unexpected {line} in {custom_alig_file}, skipping") + continue + + mapping = line.split()[1] + if ':' not in mapping: + log.warning(f"Missing colon in mapping: {line}") + continue + + # constructs dictionaries + try: + model_side, ref_side = mapping.split(':') + model_chain = model_side[0] + model_resid = int(model_side[1:]) + ref_chain = ref_side[0] + ref_resid = int(ref_side[1:]) + + if ref_chain not in numbering_dic: + numbering_dic[ref_chain] = {} + + numbering_dic[ref_chain][model_resid] = ref_resid + model2ref_chain_dict[model_chain] = ref_chain + + except (ValueError, IndexError) as e: + log.warning(f"Could not parse line: {line} ({e})") + continue + + return numbering_dic, model2ref_chain_dict + + def align_strct( reference: PDBFile, model: PDBFile, From ab340fe7a652def042deffd8c8fa7b33b4bff7bb Mon Sep 17 00:00:00 2001 From: Anna Kravchenko Date: Tue, 14 Oct 2025 17:18:58 +0200 Subject: [PATCH 03/11] upd run() of caprieval --- src/haddock/modules/analysis/caprieval/capri.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/haddock/modules/analysis/caprieval/capri.py b/src/haddock/modules/analysis/caprieval/capri.py index 5d16c69275..7e1fae6a0e 100644 --- a/src/haddock/modules/analysis/caprieval/capri.py +++ b/src/haddock/modules/analysis/caprieval/capri.py @@ -612,6 +612,7 @@ def run(self) -> Union[None, "CAPRI"]: align_func = get_align( method=self.params["alignment_method"], lovoalign_exec=self.params["lovoalign_exec"], + alig_fname=self.params["alig_fname"], ) self.model2ref_numbering, self.model2ref_chain_dict = align_func( self.reference, self.model, self.path From 2728c77193f0ce1b20f5715f9363a19ce1a861c0 Mon Sep 17 00:00:00 2001 From: Anna Kravchenko Date: Tue, 14 Oct 2025 17:42:42 +0200 Subject: [PATCH 04/11] sneaky typos! --- src/haddock/modules/analysis/caprieval/defaults.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/haddock/modules/analysis/caprieval/defaults.yaml b/src/haddock/modules/analysis/caprieval/defaults.yaml index 87272eedcd..1f2de6dfaa 100644 --- a/src/haddock/modules/analysis/caprieval/defaults.yaml +++ b/src/haddock/modules/analysis/caprieval/defaults.yaml @@ -155,8 +155,8 @@ alig_fname: default: '' type: file title: Alignment filename - short description: Filename of the file containing alignment information, izone format. - long description: Alignment file in izone format containing the mapping between residues. E.g. "ZONE A1:A5" means that + short: Filename of the file containing alignment information, izone format. + long: Alignment file in izone format containing the mapping between residues. E.g. "ZONE A1:A5" means that residue 1 of chain A of the model corresponds to residue 5 of chain A or the reference. Note that if this parameter is provided, the 'alignment_method' parameter is ignored. group: analysis From 3a6fd43f17ba030502f2729f1b0d7144c6daf73b Mon Sep 17 00:00:00 2001 From: Anna Kravchenko Date: Wed, 15 Oct 2025 16:23:41 +0200 Subject: [PATCH 05/11] fix current tests --- tests/test_module_caprieval.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_module_caprieval.py b/tests/test_module_caprieval.py index 12da92c794..0dc5fabdb4 100644 --- a/tests/test_module_caprieval.py +++ b/tests/test_module_caprieval.py @@ -855,6 +855,7 @@ def test_capri_run(mocker, monkeypatch): "fnat_cutoff": 10, "irmsd_cutoff": 10, "keep_hetatm": False, + "alig_fname": None, }, ) rand_fnat = random.random() From 9f3ce2c153d434632432ea9311780fd76c746786 Mon Sep 17 00:00:00 2001 From: Anna Kravchenko Date: Thu, 16 Oct 2025 12:07:12 +0200 Subject: [PATCH 06/11] better parameter description --- src/haddock/modules/analysis/caprieval/defaults.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/src/haddock/modules/analysis/caprieval/defaults.yaml b/src/haddock/modules/analysis/caprieval/defaults.yaml index 1f2de6dfaa..378c6a6f56 100644 --- a/src/haddock/modules/analysis/caprieval/defaults.yaml +++ b/src/haddock/modules/analysis/caprieval/defaults.yaml @@ -159,6 +159,7 @@ alig_fname: long: Alignment file in izone format containing the mapping between residues. E.g. "ZONE A1:A5" means that residue 1 of chain A of the model corresponds to residue 5 of chain A or the reference. Note that if this parameter is provided, the 'alignment_method' parameter is ignored. + Note that all caprieval metrics will be calculated using exclusively the chains and residues f srom izone file. group: analysis explevel: easy From 147371d8f5c89b00e31e53cd9e6da8b24c9744d8 Mon Sep 17 00:00:00 2001 From: Anna Kravchenko Date: Thu, 16 Oct 2025 12:17:18 +0200 Subject: [PATCH 07/11] change explevel to expert --- src/haddock/modules/analysis/caprieval/defaults.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/haddock/modules/analysis/caprieval/defaults.yaml b/src/haddock/modules/analysis/caprieval/defaults.yaml index 378c6a6f56..a7158e6924 100644 --- a/src/haddock/modules/analysis/caprieval/defaults.yaml +++ b/src/haddock/modules/analysis/caprieval/defaults.yaml @@ -161,7 +161,7 @@ alig_fname: Note that if this parameter is provided, the 'alignment_method' parameter is ignored. Note that all caprieval metrics will be calculated using exclusively the chains and residues f srom izone file. group: analysis - explevel: easy + explevel: expert alignment_method: default: sequence From 26adc451416b63a135d4594ba7caa90664107c3a Mon Sep 17 00:00:00 2001 From: Anna Kravchenko Date: Fri, 24 Oct 2025 17:56:08 +0200 Subject: [PATCH 08/11] starting on tests --- tests/test_libalign.py | 45 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/tests/test_libalign.py b/tests/test_libalign.py index 343ae66eb1..0a480124fb 100644 --- a/tests/test_libalign.py +++ b/tests/test_libalign.py @@ -22,6 +22,7 @@ make_range, pdb2fastadic, rearrange_xyz_files, + align_custom, ) from . import golden_data @@ -567,3 +568,47 @@ def test_check_chains(): assert obs_r_chain == exp_r_chain assert obs_l_chain == exp_l_chain +### new +def test_align_custom_basic(): + """Test basic custom alignment file parsing. + Use protdna_complex_1.pdb and protdna_complex_2.pdb and a 1:1 + custom alignment file protdna_complex.izone + """ + ref = Path(golden_data, "protdna_complex_1.pdb") + mod = Path(golden_data, "protdna_complex_2.pdb") + custom_izone = Path(golden_data, "protdna_complex.izone") + + observed_numb_dic, observed_chm_dict = align_custom( + reference=ref, + model=mod, + output_path=golden_data, + custom_alig_file=custom_izone + ) + + expected_chm_dict = {"A": "A", "B": "B"} + + assert isinstance(observed_numb_dic, dict) + assert "A" in observed_numb_dic + assert "B" in observed_numb_dic + assert isinstance(observed_numb_dic["A"], dict) + assert isinstance(observed_numb_dic["B"], dict) + # protein has 64 residues, each should be in mapping + assert len(observed_numb_dic["A"]) == 64 + # same for dna + assert len(observed_numb_dic["B"]) == 22 + # several random residues + assert observed_numb_dic["A"][-1] == -1 + assert observed_numb_dic["B"][38] == 38 + assert isinstance(observed_chm_dict, dict) + assert observed_chm_dict == expected_chm_dict + + # all mapped residues are integers + for chain, mappings in observed_numb_dic.items(): + for model_res, ref_res in mappings.items(): + assert isinstance(model_res, int) + assert isinstance(ref_res, int) + + # all mapped chains are strings + for model_chain, ref_chain in observed_chm_dict.items(): + assert isinstance(model_chain, str) + assert isinstance(ref_chain, str) \ No newline at end of file From 620e8b7ee6704552d0d10df0de1cf22995632adb Mon Sep 17 00:00:00 2001 From: Anna Kravchenko Date: Fri, 24 Oct 2025 18:09:35 +0200 Subject: [PATCH 09/11] add example izone file --- tests/golden_data/protdna_complex.izone | 88 +++++++++++++++++++++++++ 1 file changed, 88 insertions(+) create mode 100644 tests/golden_data/protdna_complex.izone diff --git a/tests/golden_data/protdna_complex.izone b/tests/golden_data/protdna_complex.izone new file mode 100644 index 0000000000..d5fa767728 --- /dev/null +++ b/tests/golden_data/protdna_complex.izone @@ -0,0 +1,88 @@ +# protein +ZONE A-1:A-1 +ZONE A0:A0 +ZONE A1:A1 +ZONE A2:A2 +ZONE A3:A3 +ZONE A4:A4 +ZONE A5:A5 +ZONE A6:A6 +ZONE A7:A7 +ZONE A8:A8 +ZONE A9:A9 +ZONE A10:A10 +ZONE A11:A11 +ZONE A12:A12 +ZONE A13:A13 +ZONE A14:A14 +ZONE A15:A15 +ZONE A16:A16 +ZONE A17:A17 +ZONE A18:A18 +ZONE A19:A19 +ZONE A20:A20 +ZONE A21:A21 +ZONE A22:A22 +ZONE A23:A23 +ZONE A24:A24 +ZONE A25:A25 +ZONE A26:A26 +ZONE A27:A27 +ZONE A28:A28 +ZONE A29:A29 +ZONE A30:A30 +ZONE A31:A31 +ZONE A32:A32 +ZONE A33:A33 +ZONE A34:A34 +ZONE A35:A35 +ZONE A36:A36 +ZONE A37:A37 +ZONE A38:A38 +ZONE A39:A39 +ZONE A40:A40 +ZONE A41:A41 +ZONE A42:A42 +ZONE A43:A43 +ZONE A44:A44 +ZONE A45:A45 +ZONE A46:A46 +ZONE A47:A47 +ZONE A48:A48 +ZONE A49:A49 +ZONE A50:A50 +ZONE A51:A51 +ZONE A52:A52 +ZONE A53:A53 +ZONE A54:A54 +ZONE A55:A55 +ZONE A56:A56 +ZONE A57:A57 +ZONE A58:A58 +ZONE A59:A59 +ZONE A60:A60 +ZONE A61:A61 +ZONE A62:A62 +# DNA +ZONE B1:B1 +ZONE B2:B2 +ZONE B3:B3 +ZONE B4:B4 +ZONE B5:B5 +ZONE B6:B6 +ZONE B7:B7 +ZONE B8:B8 +ZONE B9:B9 +ZONE B10:B10 +ZONE B11:B11 +ZONE B28:B28 +ZONE B29:B29 +ZONE B30:B30 +ZONE B31:B31 +ZONE B32:B32 +ZONE B33:B33 +ZONE B34:B34 +ZONE B35:B35 +ZONE B36:B36 +ZONE B37:B37 +ZONE B38:B38 From b6f4110495ae79779eca145c445b2882d2379d0b Mon Sep 17 00:00:00 2001 From: Anna Kravchenko Date: Mon, 10 Nov 2025 16:21:32 +0100 Subject: [PATCH 10/11] rename alig_fname to align_fname --- src/haddock/libs/libalign.py | 12 ++++++------ src/haddock/modules/analysis/caprieval/capri.py | 2 +- src/haddock/modules/analysis/caprieval/defaults.yaml | 3 ++- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/src/haddock/libs/libalign.py b/src/haddock/libs/libalign.py index f5d6b94bf9..c2c1983c4f 100644 --- a/src/haddock/libs/libalign.py +++ b/src/haddock/libs/libalign.py @@ -771,7 +771,7 @@ def pdb2fastadic(pdb_f: PDBPath) -> dict[str, dict[int, str]]: def get_align( - method: str, lovoalign_exec: FilePath, alig_fname: Optional[FilePath] = None + method: str, lovoalign_exec: FilePath, align_fname: Optional[FilePath] = None ) -> partial[dict[str, dict[int, int]]]: """ Get the alignment function. @@ -784,7 +784,7 @@ def get_align( lovoalign_exec : str Path to the lovoalign executable. - alig_fname : str + align_fname : str Path to the custom alignment file, optional Returns @@ -792,10 +792,10 @@ def get_align( align_func : functools.partial desired alignment function """ - if alig_fname: - if not Path(alig_fname).exists(): - raise FileNotFoundError(f"Custom alignment file {alig_fname} not found") - align_func = partial(align_custom, custom_alig_file = Path(alig_fname)) + if align_fname: + if not Path(align_fname).exists(): + raise FileNotFoundError(f"Custom alignment file {align_fname} not found") + align_func = partial(align_custom, custom_alig_file = Path(align_fname)) elif method == "structure": align_func = partial(align_strct, lovoalign_exec=lovoalign_exec) diff --git a/src/haddock/modules/analysis/caprieval/capri.py b/src/haddock/modules/analysis/caprieval/capri.py index 7e1fae6a0e..b9c9b96199 100644 --- a/src/haddock/modules/analysis/caprieval/capri.py +++ b/src/haddock/modules/analysis/caprieval/capri.py @@ -612,7 +612,7 @@ def run(self) -> Union[None, "CAPRI"]: align_func = get_align( method=self.params["alignment_method"], lovoalign_exec=self.params["lovoalign_exec"], - alig_fname=self.params["alig_fname"], + align_fname=self.params["align_fname"], ) self.model2ref_numbering, self.model2ref_chain_dict = align_func( self.reference, self.model, self.path diff --git a/src/haddock/modules/analysis/caprieval/defaults.yaml b/src/haddock/modules/analysis/caprieval/defaults.yaml index a7158e6924..dc62d2d18d 100644 --- a/src/haddock/modules/analysis/caprieval/defaults.yaml +++ b/src/haddock/modules/analysis/caprieval/defaults.yaml @@ -151,7 +151,7 @@ sort_ascending: group: analysis explevel: easy -alig_fname: +align_fname: default: '' type: file title: Alignment filename @@ -160,6 +160,7 @@ alig_fname: residue 1 of chain A of the model corresponds to residue 5 of chain A or the reference. Note that if this parameter is provided, the 'alignment_method' parameter is ignored. Note that all caprieval metrics will be calculated using exclusively the chains and residues f srom izone file. + Note that there is no prior consistency verification between izone and pdb files - incorrect izone may crash caprieval. group: analysis explevel: expert From f369cf1d4aa3916604b37530e50acc5ee1b4b0b2 Mon Sep 17 00:00:00 2001 From: Anna Kravchenko Date: Mon, 10 Nov 2025 16:40:54 +0100 Subject: [PATCH 11/11] rename alig_fname to align_fname v2 --- tests/test_module_caprieval.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_module_caprieval.py b/tests/test_module_caprieval.py index 0dc5fabdb4..3f9e240c57 100644 --- a/tests/test_module_caprieval.py +++ b/tests/test_module_caprieval.py @@ -855,7 +855,7 @@ def test_capri_run(mocker, monkeypatch): "fnat_cutoff": 10, "irmsd_cutoff": 10, "keep_hetatm": False, - "alig_fname": None, + "align_fname": None, }, ) rand_fnat = random.random()