"""
BondList class for efficient bond storage and manipulation.
This module provides the BondList class for storing molecular bonds with smart indexing
that automatically adjusts when the parent structure is modified.
"""
from enum import Enum
from typing import Any, Dict, List, Optional, Tuple, Union
import numpy as np
class BondOrder(Enum):
"""Bond order enumeration."""
SINGLE = 1.0
DOUBLE = 2.0
TRIPLE = 3.0
AROMATIC = 1.5
UNKNOWN = 0.0
[docs]
class BondList:
"""
Efficient storage and manipulation of molecular bonds with smart indexing.
The BondList class stores bonds as pairs of atom indices with additional metadata
such as bond order, detection method, and confidence scores. It supports smart
indexing that automatically adjusts bond indices when the parent structure is
sliced or modified.
Bond storage uses Structure of Arrays (SoA) design:
- bonds: (N, 2) array of atom index pairs
- bond_order: Bond order (1=single, 2=double, 3=triple, 1.5=aromatic)
- bond_type: Bond type classification
- detection_method: How the bond was detected
- confidence: Confidence score for bond existence
Smart indexing features:
- Automatic bond index adjustment when structure is sliced
- Efficient bond filtering based on atom selections
- Bond validation against structure changes
Example:
>>> bond_list = BondList()
>>> bond_list.add_bond(0, 1, bond_order=1.0, bond_type="covalent")
>>> bond_list.add_bonds([(2, 3), (3, 4)], bond_orders=[1.0, 2.0])
>>> subset_bonds = bond_list.filter_by_atoms([0, 1, 2])
"""
[docs]
def __init__(self, n_bonds: int = 0):
"""
Initialize BondList.
Args:
n_bonds: Initial number of bonds (default: 0 for dynamic growth)
"""
self.n_bonds = n_bonds
self._capacity = max(n_bonds, 10) # Minimum capacity for dynamic growth
# Core bond data - always present
self.bonds = np.full((self._capacity, 2), -1, dtype=np.int32)
self.bond_order = np.ones(self._capacity, dtype=np.float32)
self.bond_type = np.full(self._capacity, "covalent", dtype="U16")
# Optional bond metadata - lazy initialization
self.detection_method: Optional[np.ndarray] = None
self.confidence: Optional[np.ndarray] = None
self.distance: Optional[np.ndarray] = None
self.is_hydrogen_bond: Optional[np.ndarray] = None
self.is_halogen_bond: Optional[np.ndarray] = None
# Track custom bond properties
self._custom_properties: set = set()
# Index mapping for smart indexing (atom_index -> new_index)
self._index_mapping: Optional[Dict[int, int]] = None
def _ensure_capacity(self, required_capacity: int) -> None:
"""
Ensure arrays have sufficient capacity, growing if necessary.
Args:
required_capacity: Required minimum capacity
"""
if required_capacity <= self._capacity:
return
# Grow by 50% or to required capacity, whichever is larger
new_capacity = max(int(self._capacity * 1.5), required_capacity)
# Resize core arrays
new_bonds = np.full((new_capacity, 2), -1, dtype=np.int32)
new_bonds[: self._capacity] = self.bonds
self.bonds = new_bonds
new_bond_order = np.ones(new_capacity, dtype=np.float32)
new_bond_order[: self._capacity] = self.bond_order
self.bond_order = new_bond_order
new_bond_type = np.full(new_capacity, "covalent", dtype="U16")
new_bond_type[: self._capacity] = self.bond_type
self.bond_type = new_bond_type
# Resize optional arrays if they exist
for attr_name in [
"detection_method",
"confidence",
"distance",
"is_hydrogen_bond",
"is_halogen_bond",
]:
attr_value = getattr(self, attr_name)
if attr_value is not None:
if attr_name == "detection_method":
new_array = np.full(new_capacity, "unknown", dtype="U16")
elif attr_name in ["is_hydrogen_bond", "is_halogen_bond"]:
new_array = np.zeros(new_capacity, dtype=bool)
else:
new_array = np.zeros(new_capacity, dtype=np.float32)
new_array[: self._capacity] = attr_value
setattr(self, attr_name, new_array)
# Resize custom properties
for prop_name in self._custom_properties:
prop_array = getattr(self, prop_name)
new_array = np.zeros(new_capacity, dtype=prop_array.dtype)
new_array[: self._capacity] = prop_array
setattr(self, prop_name, new_array)
self._capacity = new_capacity
def _ensure_property(
self, property_name: str, dtype: Any, default_value: Any = None
) -> np.ndarray:
"""
Ensure optional property exists, creating it if necessary.
Args:
property_name: Name of the property attribute
dtype: NumPy data type
default_value: Default value to fill array
Returns:
The property array
"""
prop_array = getattr(self, property_name)
if prop_array is None:
prop_array = np.zeros(self._capacity, dtype=dtype)
if default_value is not None:
prop_array[: self.n_bonds].fill(default_value)
setattr(self, property_name, prop_array)
return prop_array # type: ignore
[docs]
def add_property(
self, name: str, dtype: Any = np.float32, default_value: Any = None
) -> None:
"""
Add custom property to bonds.
Args:
name: Name of the property
dtype: NumPy data type for the property
default_value: Default value to fill existing bonds
Raises:
ValueError: If property name already exists
"""
if hasattr(self, name):
raise ValueError(f"Property '{name}' already exists")
prop_array = np.zeros(self._capacity, dtype=dtype)
if default_value is not None:
prop_array[: self.n_bonds].fill(default_value)
setattr(self, name, prop_array)
self._custom_properties.add(name)
[docs]
def add_bond(
self,
atom1: int,
atom2: int,
bond_order: float = 1.0,
bond_type: str = "covalent",
**kwargs: Any,
) -> int:
"""
Add a single bond.
Args:
atom1: Index of first atom
atom2: Index of second atom
bond_order: Bond order (1.0=single, 2.0=double, etc.)
bond_type: Type of bond
**kwargs: Additional bond properties
Returns:
Index of the added bond
Raises:
ValueError: If atoms are the same or invalid
"""
if atom1 == atom2:
raise ValueError("Cannot create bond between same atom")
if atom1 < 0 or atom2 < 0:
raise ValueError("Atom indices must be non-negative")
# Ensure capacity
self._ensure_capacity(self.n_bonds + 1)
# Store bond with consistent ordering (smaller index first)
if atom1 > atom2:
atom1, atom2 = atom2, atom1
bond_idx = self.n_bonds
self.bonds[bond_idx] = [atom1, atom2]
self.bond_order[bond_idx] = bond_order
self.bond_type[bond_idx] = bond_type
# Set optional properties if provided
for prop_name, value in kwargs.items():
if hasattr(self, prop_name):
# Use _ensure_property for lazy initialization of optional properties
if prop_name == "detection_method":
prop_array = self._ensure_property(
"detection_method", dtype="U10", default_value="unknown"
)
prop_array[bond_idx] = value
else:
prop_array = getattr(self, prop_name)
if prop_array is not None:
prop_array[bond_idx] = value
self.n_bonds += 1
return bond_idx
[docs]
def add_bonds(
self,
bond_pairs: List[Tuple[int, int]],
bond_orders: Optional[List[float]] = None,
bond_types: Optional[List[str]] = None,
**kwargs: Any,
) -> np.ndarray:
"""
Add multiple bonds efficiently.
Args:
bond_pairs: List of (atom1, atom2) tuples
bond_orders: Optional list of bond orders (default: all 1.0)
bond_types: Optional list of bond types (default: all "covalent")
**kwargs: Additional properties as lists
Returns:
Array of bond indices for added bonds
Raises:
ValueError: If list lengths don't match
"""
n_new_bonds = len(bond_pairs)
if n_new_bonds == 0:
return np.array([], dtype=np.int32) # type: ignore
# Validate input lengths
if bond_orders is not None and len(bond_orders) != n_new_bonds:
raise ValueError("bond_orders length must match bond_pairs length")
if bond_types is not None and len(bond_types) != n_new_bonds:
raise ValueError("bond_types length must match bond_pairs length")
for prop_name, values in kwargs.items():
if len(values) != n_new_bonds:
raise ValueError(f"{prop_name} length must match bond_pairs length")
# Ensure capacity
self._ensure_capacity(self.n_bonds + n_new_bonds)
# Prepare bond indices
start_idx = self.n_bonds
end_idx = start_idx + n_new_bonds
bond_indices = np.arange(start_idx, end_idx)
# Process bond pairs with consistent ordering
bonds_array = np.array(bond_pairs, dtype=np.int32)
# Sort each bond pair so smaller index comes first
bonds_array.sort(axis=1)
# Validate bonds
if np.any(bonds_array[:, 0] == bonds_array[:, 1]):
raise ValueError("Cannot create bonds between same atoms")
if np.any(bonds_array < 0):
raise ValueError("Atom indices must be non-negative")
# Store bonds
self.bonds[start_idx:end_idx] = bonds_array
# Store bond orders
if bond_orders is not None:
self.bond_order[start_idx:end_idx] = bond_orders
else:
self.bond_order[start_idx:end_idx] = 1.0
# Store bond types
if bond_types is not None:
self.bond_type[start_idx:end_idx] = bond_types
else:
self.bond_type[start_idx:end_idx] = "covalent"
# Store optional properties
for prop_name, values in kwargs.items():
if hasattr(self, prop_name):
prop_array = getattr(self, prop_name)
if prop_array is not None:
prop_array[start_idx:end_idx] = values
self.n_bonds += n_new_bonds
return bond_indices # type: ignore
[docs]
def remove_bonds(
self, bond_indices: Union[int, List[int], np.ndarray[Any, Any]]
) -> None:
"""
Remove bonds by index.
Args:
bond_indices: Bond index or array of bond indices to remove
"""
if isinstance(bond_indices, int):
bond_indices = [bond_indices]
elif isinstance(bond_indices, np.ndarray):
bond_indices = bond_indices.tolist()
# Validate indices
bond_indices = [idx for idx in bond_indices if 0 <= idx < self.n_bonds] # type: ignore
if not bond_indices:
return
# Sort in descending order for safe removal
bond_indices = sorted(set(bond_indices), reverse=True)
# Remove bonds by shifting arrays
for idx in bond_indices:
# Shift remaining bonds down
self.bonds[idx : self.n_bonds - 1] = self.bonds[idx + 1 : self.n_bonds]
self.bond_order[idx : self.n_bonds - 1] = self.bond_order[
idx + 1 : self.n_bonds
]
self.bond_type[idx : self.n_bonds - 1] = self.bond_type[
idx + 1 : self.n_bonds
]
# Shift optional properties
for attr_name in [
"detection_method",
"confidence",
"distance",
"is_hydrogen_bond",
"is_halogen_bond",
]:
attr_value = getattr(self, attr_name)
if attr_value is not None:
attr_value[idx : self.n_bonds - 1] = attr_value[
idx + 1 : self.n_bonds
]
# Shift custom properties
for prop_name in self._custom_properties:
prop_array = getattr(self, prop_name)
prop_array[idx : self.n_bonds - 1] = prop_array[idx + 1 : self.n_bonds]
self.n_bonds -= 1
[docs]
def get_bonds_for_atom(self, atom_index: int) -> np.ndarray:
"""
Get all bonds involving a specific atom.
Args:
atom_index: Index of the atom
Returns:
Array of bond indices involving the atom
"""
if self.n_bonds == 0:
return np.array([], dtype=np.int32) # type: ignore
active_bonds = self.bonds[: self.n_bonds]
mask = (active_bonds[:, 0] == atom_index) | (active_bonds[:, 1] == atom_index)
return np.where(mask)[0] # type: ignore
[docs]
def get_neighbors(self, atom_index: int) -> np.ndarray:
"""
Get neighbor atoms for a specific atom.
Args:
atom_index: Index of the atom
Returns:
Array of neighbor atom indices
"""
bond_indices = self.get_bonds_for_atom(atom_index)
if len(bond_indices) == 0:
return np.array([], dtype=np.int32) # type: ignore
bonds = self.bonds[bond_indices]
# Get the other atom in each bond
neighbors = np.where(bonds[:, 0] == atom_index, bonds[:, 1], bonds[:, 0])
return neighbors # type: ignore
[docs]
def filter_by_atoms(self, atom_indices: Union[List[int], np.ndarray]) -> "BondList":
"""
Create new BondList containing only bonds between specified atoms.
Args:
atom_indices: List or array of atom indices to keep
Returns:
New BondList with filtered bonds and remapped indices
"""
atom_set = set(atom_indices)
# Find bonds where both atoms are in the selection
if self.n_bonds == 0:
return BondList(0)
active_bonds = self.bonds[: self.n_bonds]
mask = np.array([both_atoms_in_set(bond, atom_set) for bond in active_bonds])
if not np.any(mask):
return BondList(0)
# Create index mapping
index_mapping = {
old_idx: new_idx for new_idx, old_idx in enumerate(sorted(atom_indices))
}
# Create new BondList
selected_indices = np.where(mask)[0]
new_bond_list = BondList(len(selected_indices))
# Copy bonds with remapped indices
for new_idx, old_idx in enumerate(selected_indices):
old_bond = active_bonds[old_idx]
new_bond = [index_mapping[old_bond[0]], index_mapping[old_bond[1]]]
new_bond_list.bonds[new_idx] = new_bond
new_bond_list.bond_order[new_idx] = self.bond_order[old_idx]
new_bond_list.bond_type[new_idx] = self.bond_type[old_idx]
# Copy optional properties
for attr_name in [
"detection_method",
"confidence",
"distance",
"is_hydrogen_bond",
"is_halogen_bond",
]:
old_attr = getattr(self, attr_name)
if old_attr is not None:
new_attr = new_bond_list._ensure_property(attr_name, old_attr.dtype)
new_attr[new_idx] = old_attr[old_idx]
# Copy custom properties
for prop_name in self._custom_properties:
old_prop = getattr(self, prop_name)
if not hasattr(new_bond_list, prop_name):
new_bond_list.add_property(prop_name, old_prop.dtype)
new_prop = getattr(new_bond_list, prop_name)
new_prop[new_idx] = old_prop[old_idx]
new_bond_list._custom_properties = self._custom_properties.copy()
return new_bond_list
[docs]
def get_bond_matrix(self, n_atoms: int) -> np.ndarray:
"""
Create bond adjacency matrix.
Args:
n_atoms: Total number of atoms in structure
Returns:
(n_atoms, n_atoms) boolean adjacency matrix
"""
matrix = np.zeros((n_atoms, n_atoms), dtype=bool)
if self.n_bonds > 0:
active_bonds = self.bonds[: self.n_bonds]
valid_mask = np.all(active_bonds < n_atoms, axis=1)
valid_bonds = active_bonds[valid_mask]
matrix[valid_bonds[:, 0], valid_bonds[:, 1]] = True
matrix[valid_bonds[:, 1], valid_bonds[:, 0]] = True
return matrix # type: ignore
[docs]
def validate_bonds(self, n_atoms: int) -> Tuple[bool, List[int]]:
"""
Validate that all bonds reference valid atom indices.
Args:
n_atoms: Number of atoms in the structure
Returns:
Tuple of (all_valid, list_of_invalid_bond_indices)
"""
if self.n_bonds == 0:
return True, []
active_bonds = self.bonds[: self.n_bonds]
invalid_mask = np.any((active_bonds < 0) | (active_bonds >= n_atoms), axis=1)
invalid_indices = np.where(invalid_mask)[0].tolist()
return len(invalid_indices) == 0, invalid_indices
[docs]
def __len__(self) -> int:
"""Return number of bonds."""
return self.n_bonds
[docs]
def __getitem__(
self, index: Union[int, slice, np.ndarray]
) -> Union[Tuple[int, int], "BondList"]:
"""
Get bond(s) by index.
Args:
index: Integer, slice, or array for indexing
Returns:
Single bond tuple or new BondList with selected bonds
"""
if isinstance(index, int):
if index < 0 or index >= self.n_bonds:
raise IndexError("Bond index out of range")
return tuple(self.bonds[index])
# Handle slice or array indexing
if isinstance(index, slice):
indices = list(range(*index.indices(self.n_bonds)))
else:
indices = list(index)
# Create new BondList with selected bonds
new_bond_list = BondList(len(indices))
for new_idx, old_idx in enumerate(indices):
new_bond_list.bonds[new_idx] = self.bonds[old_idx]
new_bond_list.bond_order[new_idx] = self.bond_order[old_idx]
new_bond_list.bond_type[new_idx] = self.bond_type[old_idx]
# Copy optional properties
for attr_name in [
"detection_method",
"confidence",
"distance",
"is_hydrogen_bond",
"is_halogen_bond",
]:
old_attr = getattr(self, attr_name)
if old_attr is not None:
new_attr = new_bond_list._ensure_property(attr_name, old_attr.dtype)
new_attr[new_idx] = old_attr[old_idx]
# Copy custom properties
for prop_name in self._custom_properties:
old_prop = getattr(self, prop_name)
if not hasattr(new_bond_list, prop_name):
new_bond_list.add_property(prop_name, old_prop.dtype)
new_prop = getattr(new_bond_list, prop_name)
new_prop[new_idx] = old_prop[old_idx]
new_bond_list._custom_properties = self._custom_properties.copy()
return new_bond_list
[docs]
def __repr__(self) -> str:
"""String representation of BondList."""
return f"BondList(n_bonds={self.n_bonds})"
[docs]
def __str__(self) -> str:
"""Detailed string representation."""
lines = [f"BondList with {self.n_bonds} bonds"]
if self.n_bonds > 0:
# Show bond type distribution
active_types = self.bond_type[: self.n_bonds]
unique_types, counts = np.unique(active_types, return_counts=True)
type_info = ", ".join(
[
f"{count} {bond_type}"
for bond_type, count in zip(unique_types, counts)
]
)
lines.append(f"Bond types: {type_info}")
# Show bond order distribution
active_orders = self.bond_order[: self.n_bonds]
unique_orders, counts = np.unique(active_orders, return_counts=True)
order_info = ", ".join(
[
f"{count} order-{order:.1f}"
for order, count in zip(unique_orders, counts)
]
)
lines.append(f"Bond orders: {order_info}")
# Show optional properties status
optional_count = sum(
1
for attr in [
"detection_method",
"confidence",
"distance",
"is_hydrogen_bond",
"is_halogen_bond",
]
if getattr(self, attr) is not None
)
custom_count = len(self._custom_properties)
lines.append(
f"Properties: {optional_count} optional, {custom_count} custom"
)
return "\n".join(lines)
def both_atoms_in_set(bond: np.ndarray, atom_set: set) -> bool:
"""
Check if both atoms in a bond are in the given set.
Args:
bond: Array with two atom indices
atom_set: Set of atom indices
Returns:
True if both atoms are in the set
"""
return bond[0] in atom_set and bond[1] in atom_set