Source code for molr.core.bond_list

"""
BondList class for efficient bond storage and manipulation.

This module provides the BondList class for storing molecular bonds with smart indexing
that automatically adjusts when the parent structure is modified.
"""

from enum import Enum
from typing import Any, Dict, List, Optional, Tuple, Union

import numpy as np


class BondOrder(Enum):
    """Bond order enumeration."""

    SINGLE = 1.0
    DOUBLE = 2.0
    TRIPLE = 3.0
    AROMATIC = 1.5
    UNKNOWN = 0.0


[docs] class BondList: """ Efficient storage and manipulation of molecular bonds with smart indexing. The BondList class stores bonds as pairs of atom indices with additional metadata such as bond order, detection method, and confidence scores. It supports smart indexing that automatically adjusts bond indices when the parent structure is sliced or modified. Bond storage uses Structure of Arrays (SoA) design: - bonds: (N, 2) array of atom index pairs - bond_order: Bond order (1=single, 2=double, 3=triple, 1.5=aromatic) - bond_type: Bond type classification - detection_method: How the bond was detected - confidence: Confidence score for bond existence Smart indexing features: - Automatic bond index adjustment when structure is sliced - Efficient bond filtering based on atom selections - Bond validation against structure changes Example: >>> bond_list = BondList() >>> bond_list.add_bond(0, 1, bond_order=1.0, bond_type="covalent") >>> bond_list.add_bonds([(2, 3), (3, 4)], bond_orders=[1.0, 2.0]) >>> subset_bonds = bond_list.filter_by_atoms([0, 1, 2]) """
[docs] def __init__(self, n_bonds: int = 0): """ Initialize BondList. Args: n_bonds: Initial number of bonds (default: 0 for dynamic growth) """ self.n_bonds = n_bonds self._capacity = max(n_bonds, 10) # Minimum capacity for dynamic growth # Core bond data - always present self.bonds = np.full((self._capacity, 2), -1, dtype=np.int32) self.bond_order = np.ones(self._capacity, dtype=np.float32) self.bond_type = np.full(self._capacity, "covalent", dtype="U16") # Optional bond metadata - lazy initialization self.detection_method: Optional[np.ndarray] = None self.confidence: Optional[np.ndarray] = None self.distance: Optional[np.ndarray] = None self.is_hydrogen_bond: Optional[np.ndarray] = None self.is_halogen_bond: Optional[np.ndarray] = None # Track custom bond properties self._custom_properties: set = set() # Index mapping for smart indexing (atom_index -> new_index) self._index_mapping: Optional[Dict[int, int]] = None
def _ensure_capacity(self, required_capacity: int) -> None: """ Ensure arrays have sufficient capacity, growing if necessary. Args: required_capacity: Required minimum capacity """ if required_capacity <= self._capacity: return # Grow by 50% or to required capacity, whichever is larger new_capacity = max(int(self._capacity * 1.5), required_capacity) # Resize core arrays new_bonds = np.full((new_capacity, 2), -1, dtype=np.int32) new_bonds[: self._capacity] = self.bonds self.bonds = new_bonds new_bond_order = np.ones(new_capacity, dtype=np.float32) new_bond_order[: self._capacity] = self.bond_order self.bond_order = new_bond_order new_bond_type = np.full(new_capacity, "covalent", dtype="U16") new_bond_type[: self._capacity] = self.bond_type self.bond_type = new_bond_type # Resize optional arrays if they exist for attr_name in [ "detection_method", "confidence", "distance", "is_hydrogen_bond", "is_halogen_bond", ]: attr_value = getattr(self, attr_name) if attr_value is not None: if attr_name == "detection_method": new_array = np.full(new_capacity, "unknown", dtype="U16") elif attr_name in ["is_hydrogen_bond", "is_halogen_bond"]: new_array = np.zeros(new_capacity, dtype=bool) else: new_array = np.zeros(new_capacity, dtype=np.float32) new_array[: self._capacity] = attr_value setattr(self, attr_name, new_array) # Resize custom properties for prop_name in self._custom_properties: prop_array = getattr(self, prop_name) new_array = np.zeros(new_capacity, dtype=prop_array.dtype) new_array[: self._capacity] = prop_array setattr(self, prop_name, new_array) self._capacity = new_capacity def _ensure_property( self, property_name: str, dtype: Any, default_value: Any = None ) -> np.ndarray: """ Ensure optional property exists, creating it if necessary. Args: property_name: Name of the property attribute dtype: NumPy data type default_value: Default value to fill array Returns: The property array """ prop_array = getattr(self, property_name) if prop_array is None: prop_array = np.zeros(self._capacity, dtype=dtype) if default_value is not None: prop_array[: self.n_bonds].fill(default_value) setattr(self, property_name, prop_array) return prop_array # type: ignore
[docs] def add_property( self, name: str, dtype: Any = np.float32, default_value: Any = None ) -> None: """ Add custom property to bonds. Args: name: Name of the property dtype: NumPy data type for the property default_value: Default value to fill existing bonds Raises: ValueError: If property name already exists """ if hasattr(self, name): raise ValueError(f"Property '{name}' already exists") prop_array = np.zeros(self._capacity, dtype=dtype) if default_value is not None: prop_array[: self.n_bonds].fill(default_value) setattr(self, name, prop_array) self._custom_properties.add(name)
[docs] def add_bond( self, atom1: int, atom2: int, bond_order: float = 1.0, bond_type: str = "covalent", **kwargs: Any, ) -> int: """ Add a single bond. Args: atom1: Index of first atom atom2: Index of second atom bond_order: Bond order (1.0=single, 2.0=double, etc.) bond_type: Type of bond **kwargs: Additional bond properties Returns: Index of the added bond Raises: ValueError: If atoms are the same or invalid """ if atom1 == atom2: raise ValueError("Cannot create bond between same atom") if atom1 < 0 or atom2 < 0: raise ValueError("Atom indices must be non-negative") # Ensure capacity self._ensure_capacity(self.n_bonds + 1) # Store bond with consistent ordering (smaller index first) if atom1 > atom2: atom1, atom2 = atom2, atom1 bond_idx = self.n_bonds self.bonds[bond_idx] = [atom1, atom2] self.bond_order[bond_idx] = bond_order self.bond_type[bond_idx] = bond_type # Set optional properties if provided for prop_name, value in kwargs.items(): if hasattr(self, prop_name): # Use _ensure_property for lazy initialization of optional properties if prop_name == "detection_method": prop_array = self._ensure_property( "detection_method", dtype="U10", default_value="unknown" ) prop_array[bond_idx] = value else: prop_array = getattr(self, prop_name) if prop_array is not None: prop_array[bond_idx] = value self.n_bonds += 1 return bond_idx
[docs] def add_bonds( self, bond_pairs: List[Tuple[int, int]], bond_orders: Optional[List[float]] = None, bond_types: Optional[List[str]] = None, **kwargs: Any, ) -> np.ndarray: """ Add multiple bonds efficiently. Args: bond_pairs: List of (atom1, atom2) tuples bond_orders: Optional list of bond orders (default: all 1.0) bond_types: Optional list of bond types (default: all "covalent") **kwargs: Additional properties as lists Returns: Array of bond indices for added bonds Raises: ValueError: If list lengths don't match """ n_new_bonds = len(bond_pairs) if n_new_bonds == 0: return np.array([], dtype=np.int32) # type: ignore # Validate input lengths if bond_orders is not None and len(bond_orders) != n_new_bonds: raise ValueError("bond_orders length must match bond_pairs length") if bond_types is not None and len(bond_types) != n_new_bonds: raise ValueError("bond_types length must match bond_pairs length") for prop_name, values in kwargs.items(): if len(values) != n_new_bonds: raise ValueError(f"{prop_name} length must match bond_pairs length") # Ensure capacity self._ensure_capacity(self.n_bonds + n_new_bonds) # Prepare bond indices start_idx = self.n_bonds end_idx = start_idx + n_new_bonds bond_indices = np.arange(start_idx, end_idx) # Process bond pairs with consistent ordering bonds_array = np.array(bond_pairs, dtype=np.int32) # Sort each bond pair so smaller index comes first bonds_array.sort(axis=1) # Validate bonds if np.any(bonds_array[:, 0] == bonds_array[:, 1]): raise ValueError("Cannot create bonds between same atoms") if np.any(bonds_array < 0): raise ValueError("Atom indices must be non-negative") # Store bonds self.bonds[start_idx:end_idx] = bonds_array # Store bond orders if bond_orders is not None: self.bond_order[start_idx:end_idx] = bond_orders else: self.bond_order[start_idx:end_idx] = 1.0 # Store bond types if bond_types is not None: self.bond_type[start_idx:end_idx] = bond_types else: self.bond_type[start_idx:end_idx] = "covalent" # Store optional properties for prop_name, values in kwargs.items(): if hasattr(self, prop_name): prop_array = getattr(self, prop_name) if prop_array is not None: prop_array[start_idx:end_idx] = values self.n_bonds += n_new_bonds return bond_indices # type: ignore
[docs] def remove_bonds( self, bond_indices: Union[int, List[int], np.ndarray[Any, Any]] ) -> None: """ Remove bonds by index. Args: bond_indices: Bond index or array of bond indices to remove """ if isinstance(bond_indices, int): bond_indices = [bond_indices] elif isinstance(bond_indices, np.ndarray): bond_indices = bond_indices.tolist() # Validate indices bond_indices = [idx for idx in bond_indices if 0 <= idx < self.n_bonds] # type: ignore if not bond_indices: return # Sort in descending order for safe removal bond_indices = sorted(set(bond_indices), reverse=True) # Remove bonds by shifting arrays for idx in bond_indices: # Shift remaining bonds down self.bonds[idx : self.n_bonds - 1] = self.bonds[idx + 1 : self.n_bonds] self.bond_order[idx : self.n_bonds - 1] = self.bond_order[ idx + 1 : self.n_bonds ] self.bond_type[idx : self.n_bonds - 1] = self.bond_type[ idx + 1 : self.n_bonds ] # Shift optional properties for attr_name in [ "detection_method", "confidence", "distance", "is_hydrogen_bond", "is_halogen_bond", ]: attr_value = getattr(self, attr_name) if attr_value is not None: attr_value[idx : self.n_bonds - 1] = attr_value[ idx + 1 : self.n_bonds ] # Shift custom properties for prop_name in self._custom_properties: prop_array = getattr(self, prop_name) prop_array[idx : self.n_bonds - 1] = prop_array[idx + 1 : self.n_bonds] self.n_bonds -= 1
[docs] def get_bonds_for_atom(self, atom_index: int) -> np.ndarray: """ Get all bonds involving a specific atom. Args: atom_index: Index of the atom Returns: Array of bond indices involving the atom """ if self.n_bonds == 0: return np.array([], dtype=np.int32) # type: ignore active_bonds = self.bonds[: self.n_bonds] mask = (active_bonds[:, 0] == atom_index) | (active_bonds[:, 1] == atom_index) return np.where(mask)[0] # type: ignore
[docs] def get_neighbors(self, atom_index: int) -> np.ndarray: """ Get neighbor atoms for a specific atom. Args: atom_index: Index of the atom Returns: Array of neighbor atom indices """ bond_indices = self.get_bonds_for_atom(atom_index) if len(bond_indices) == 0: return np.array([], dtype=np.int32) # type: ignore bonds = self.bonds[bond_indices] # Get the other atom in each bond neighbors = np.where(bonds[:, 0] == atom_index, bonds[:, 1], bonds[:, 0]) return neighbors # type: ignore
[docs] def filter_by_atoms(self, atom_indices: Union[List[int], np.ndarray]) -> "BondList": """ Create new BondList containing only bonds between specified atoms. Args: atom_indices: List or array of atom indices to keep Returns: New BondList with filtered bonds and remapped indices """ atom_set = set(atom_indices) # Find bonds where both atoms are in the selection if self.n_bonds == 0: return BondList(0) active_bonds = self.bonds[: self.n_bonds] mask = np.array([both_atoms_in_set(bond, atom_set) for bond in active_bonds]) if not np.any(mask): return BondList(0) # Create index mapping index_mapping = { old_idx: new_idx for new_idx, old_idx in enumerate(sorted(atom_indices)) } # Create new BondList selected_indices = np.where(mask)[0] new_bond_list = BondList(len(selected_indices)) # Copy bonds with remapped indices for new_idx, old_idx in enumerate(selected_indices): old_bond = active_bonds[old_idx] new_bond = [index_mapping[old_bond[0]], index_mapping[old_bond[1]]] new_bond_list.bonds[new_idx] = new_bond new_bond_list.bond_order[new_idx] = self.bond_order[old_idx] new_bond_list.bond_type[new_idx] = self.bond_type[old_idx] # Copy optional properties for attr_name in [ "detection_method", "confidence", "distance", "is_hydrogen_bond", "is_halogen_bond", ]: old_attr = getattr(self, attr_name) if old_attr is not None: new_attr = new_bond_list._ensure_property(attr_name, old_attr.dtype) new_attr[new_idx] = old_attr[old_idx] # Copy custom properties for prop_name in self._custom_properties: old_prop = getattr(self, prop_name) if not hasattr(new_bond_list, prop_name): new_bond_list.add_property(prop_name, old_prop.dtype) new_prop = getattr(new_bond_list, prop_name) new_prop[new_idx] = old_prop[old_idx] new_bond_list._custom_properties = self._custom_properties.copy() return new_bond_list
[docs] def get_bond_matrix(self, n_atoms: int) -> np.ndarray: """ Create bond adjacency matrix. Args: n_atoms: Total number of atoms in structure Returns: (n_atoms, n_atoms) boolean adjacency matrix """ matrix = np.zeros((n_atoms, n_atoms), dtype=bool) if self.n_bonds > 0: active_bonds = self.bonds[: self.n_bonds] valid_mask = np.all(active_bonds < n_atoms, axis=1) valid_bonds = active_bonds[valid_mask] matrix[valid_bonds[:, 0], valid_bonds[:, 1]] = True matrix[valid_bonds[:, 1], valid_bonds[:, 0]] = True return matrix # type: ignore
[docs] def validate_bonds(self, n_atoms: int) -> Tuple[bool, List[int]]: """ Validate that all bonds reference valid atom indices. Args: n_atoms: Number of atoms in the structure Returns: Tuple of (all_valid, list_of_invalid_bond_indices) """ if self.n_bonds == 0: return True, [] active_bonds = self.bonds[: self.n_bonds] invalid_mask = np.any((active_bonds < 0) | (active_bonds >= n_atoms), axis=1) invalid_indices = np.where(invalid_mask)[0].tolist() return len(invalid_indices) == 0, invalid_indices
[docs] def __len__(self) -> int: """Return number of bonds.""" return self.n_bonds
[docs] def __getitem__( self, index: Union[int, slice, np.ndarray] ) -> Union[Tuple[int, int], "BondList"]: """ Get bond(s) by index. Args: index: Integer, slice, or array for indexing Returns: Single bond tuple or new BondList with selected bonds """ if isinstance(index, int): if index < 0 or index >= self.n_bonds: raise IndexError("Bond index out of range") return tuple(self.bonds[index]) # Handle slice or array indexing if isinstance(index, slice): indices = list(range(*index.indices(self.n_bonds))) else: indices = list(index) # Create new BondList with selected bonds new_bond_list = BondList(len(indices)) for new_idx, old_idx in enumerate(indices): new_bond_list.bonds[new_idx] = self.bonds[old_idx] new_bond_list.bond_order[new_idx] = self.bond_order[old_idx] new_bond_list.bond_type[new_idx] = self.bond_type[old_idx] # Copy optional properties for attr_name in [ "detection_method", "confidence", "distance", "is_hydrogen_bond", "is_halogen_bond", ]: old_attr = getattr(self, attr_name) if old_attr is not None: new_attr = new_bond_list._ensure_property(attr_name, old_attr.dtype) new_attr[new_idx] = old_attr[old_idx] # Copy custom properties for prop_name in self._custom_properties: old_prop = getattr(self, prop_name) if not hasattr(new_bond_list, prop_name): new_bond_list.add_property(prop_name, old_prop.dtype) new_prop = getattr(new_bond_list, prop_name) new_prop[new_idx] = old_prop[old_idx] new_bond_list._custom_properties = self._custom_properties.copy() return new_bond_list
[docs] def __repr__(self) -> str: """String representation of BondList.""" return f"BondList(n_bonds={self.n_bonds})"
[docs] def __str__(self) -> str: """Detailed string representation.""" lines = [f"BondList with {self.n_bonds} bonds"] if self.n_bonds > 0: # Show bond type distribution active_types = self.bond_type[: self.n_bonds] unique_types, counts = np.unique(active_types, return_counts=True) type_info = ", ".join( [ f"{count} {bond_type}" for bond_type, count in zip(unique_types, counts) ] ) lines.append(f"Bond types: {type_info}") # Show bond order distribution active_orders = self.bond_order[: self.n_bonds] unique_orders, counts = np.unique(active_orders, return_counts=True) order_info = ", ".join( [ f"{count} order-{order:.1f}" for order, count in zip(unique_orders, counts) ] ) lines.append(f"Bond orders: {order_info}") # Show optional properties status optional_count = sum( 1 for attr in [ "detection_method", "confidence", "distance", "is_hydrogen_bond", "is_halogen_bond", ] if getattr(self, attr) is not None ) custom_count = len(self._custom_properties) lines.append( f"Properties: {optional_count} optional, {custom_count} custom" ) return "\n".join(lines)
def both_atoms_in_set(bond: np.ndarray, atom_set: set) -> bool: """ Check if both atoms in a bond are in the given set. Args: bond: Array with two atom indices atom_set: Set of atom indices Returns: True if both atoms are in the set """ return bond[0] in atom_set and bond[1] in atom_set