Skip to content
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 21 additions & 8 deletions models/rfd3/src/rfd3/inference/input_parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,7 @@ class DesignInputSpecification(BaseModel):
# Extra args:
length: Optional[str] = Field(None, description="Length range as 'min-max' or int. Constrains length of contig if provided")
ligand: Optional[str] = Field(None, description="Ligand name or index to include in design.")
allow_ligand_on_chain_a: bool = Field(False, description="If True, suppress the error when a ligand is on chain A (the protein chain). Use with caution — chain ID is leaked to the model.")
cif_parser_args: Optional[Dict[str, Any]] = Field(None, description="CIF parser arguments")
extra: Optional[Dict[str, Any]] = Field(default_factory=dict, description="Extra metadata to include in output (useful for logging additional info in metadata)")
dialect: int = Field(2, description="RFdiffusion3 input dialect. 1: legacy, 2: release.")
Expand Down Expand Up @@ -672,14 +673,26 @@ def _append_ligand(self, atom_array, atom_array_input_annotated):
+ list(atom_array_input_annotated.get_annotation_categories())
),
)
# Offset ligand residue ids based on the original input to avoid clashes
# with any newly created residues (matches legacy behaviour).
ligand_array.res_id = (
ligand_array.res_id
- np.min(ligand_array.res_id)
+ np.max(atom_array.res_id)
+ 1
)
# Error if any ligand sits on chain A (the protein chain) unless
# explicitly overridden — chain ID is leaked to the model so this
# is a significant difference from the expected convention.
ligand_chains = np.unique(ligand_array.chain_id)
if "A" in ligand_chains and not self.allow_ligand_on_chain_a:
raise ValueError(
f"Ligand found on chain A, which is reserved for the protein. "
f"Ligand chain(s): {ligand_chains.tolist()}. "
f"Place ligands on separate chains (B, C, D, ...) or set "
f"'allow_ligand_on_chain_a: true' to override this check."
)
# Reset ligand res_id to start from 1 per chain, matching the
# convention AF3 uses in its output CIF files. Use dense
# rank-based renumbering so gaps in the original numbering
# (e.g. res_id 850, 900) become sequential (1, 2).

Copilot AI Apr 5, 2026

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The ligand-on-chain check is hard-coded to chain ID "A", but in partial-diffusion mode the protein chain ID is taken from the input (start_chain = atom_array_in.chain_id[0]) and may not be A. This can raise (or miss) the error incorrectly. Consider checking whether the ligand shares a chain with any protein atoms in atom_array (e.g., np.intersect1d(np.unique(atom_array.chain_id[atom_array.is_protein]), ligand_chains)), rather than assuming the protein chain is always A.

Suggested change
f"Ligand found on chain A, which is reserved for the protein. "
f"Ligand chain(s): {ligand_chains.tolist()}. "
f"Place ligands on separate chains (B, C, D, ...) or set "
f"'allow_ligand_on_chain_a: true' to override this check."
)
# Reset ligand res_id to start from 1 per chain, matching the
# convention AF3 uses in its output CIF files. Use dense
# rank-based renumbering so gaps in the original numbering
# (e.g. res_id 850, 900) become sequential (1, 2).
# Error if any ligand shares a chain with protein atoms unless
# explicitly overridden — chain ID is leaked to the model so this
# is a significant difference from the expected convention.
ligand_chains = np.unique(ligand_array.chain_id)
protein_chains = np.unique(atom_array.chain_id[atom_array.is_protein])
overlapping_chains = np.intersect1d(protein_chains, ligand_chains)
if overlapping_chains.size > 0 and not self.allow_ligand_on_chain_a:
raise ValueError(
f"Ligand found on protein chain(s): {overlapping_chains.tolist()}. "
f"Protein chain(s): {protein_chains.tolist()}. "
f"Ligand chain(s): {ligand_chains.tolist()}. "
f"Place ligands on separate chains or set "

Copilot uses AI. Check for mistakes.
for chain in ligand_chains:
mask = ligand_array.chain_id == chain
chain_res_ids = ligand_array.res_id[mask]
_, inverse = np.unique(chain_res_ids, return_inverse=True)
ligand_array.res_id[mask] = inverse + 1
# Harmonize conditioning annotations before concatenation: biotite's
# concatenate only preserves annotations present in ALL arrays (set
# intersection), so mismatched optional conditioning annotations
Expand Down
Loading