"""
Structure class for NumPy-based molecular structure representation.
This module provides the Structure class,
for efficient storage and manipulation of molecular structures using NumPy arrays.
"""
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union
import numpy as np
try:
from scipy.spatial import KDTree
HAS_SCIPY = True
except ImportError:
KDTree = None
HAS_SCIPY = False
from ..constants.atomic_data import AtomicData
from ..constants.pdb_constants import (
DNA_RESIDUES,
DNA_RNA_BACKBONE_ATOMS,
DNA_RNA_BASE_ATOMS,
PROTEIN_BACKBONE_ATOMS,
PROTEIN_RESIDUES,
PROTEIN_SIDECHAIN_ATOMS,
RESIDUES_WITH_AROMATIC_RINGS,
RNA_RESIDUES,
WATER_MOLECULES,
)
if TYPE_CHECKING:
from .bond_list import BondList
[docs]
class Structure:
"""
A NumPy-based molecular structure representation using Structure of Arrays (SoA) design.
This class stores molecular data in separate NumPy arrays
for efficient vectorized operations and memory usage. Supports optional annotations
with lazy initialization to save memory when not needed.
Core annotations (always present):
- coord: Atomic coordinates (x, y, z) as float64
- atom_name: PDB atom names as U4 strings
- element: Element symbols as U2 strings
- res_name: Residue names as U3 strings
- res_id: Residue sequence numbers as int32
- chain_id: Chain identifiers as U1 strings
Optional annotations (lazy initialization):
- alt_loc: Alternate location indicators
- occupancy: Occupancy values
- b_factor: Temperature factors
- charge: Formal charges
- serial: Atom serial numbers
- insertion_code: Residue insertion codes
- segment_id: Segment identifiers
Classification flags (computed on demand):
- is_backbone: Boolean array for backbone atoms
- is_sidechain: Boolean array for sidechain atoms
- is_aromatic: Boolean array for aromatic atoms
- is_ligand: Boolean array for ligand atoms
- residue_type: Residue type classification
Example:
>>> structure = Structure(n_atoms=100)
>>> structure.coord = np.random.rand(100, 3)
>>> structure.atom_name[:] = "CA"
>>> structure.add_annotation("custom_prop", dtype=np.float32, default_value=1.0)
"""
[docs]
def __init__(self, n_atoms: int):
"""
Initialize Structure with core annotations only.
Args:
n_atoms: Number of atoms in the structure
Raises:
ValueError: If n_atoms <= 0
"""
if n_atoms <= 0:
raise ValueError("Number of atoms must be positive")
self.n_atoms = n_atoms
# Core annotations - always present
self._coord = np.zeros((n_atoms, 3), dtype=np.float64)
self.atom_name = np.empty(n_atoms, dtype="U4")
self.element = np.empty(n_atoms, dtype="U2")
self.res_name = np.empty(n_atoms, dtype="U3")
self.res_id = np.zeros(n_atoms, dtype=np.int32)
self.chain_id = np.empty(n_atoms, dtype="U1")
# Optional annotations - lazy initialization
self.alt_loc: Optional[np.ndarray] = None
self.occupancy: Optional[np.ndarray] = None
self.b_factor: Optional[np.ndarray] = None
self.charge: Optional[np.ndarray] = None
self.serial: Optional[np.ndarray] = None
self.insertion_code: Optional[np.ndarray] = None
self.segment_id: Optional[np.ndarray] = None
# Classification flags - computed on demand
self._is_backbone: Optional[np.ndarray] = None
self._is_sidechain: Optional[np.ndarray] = None
self._is_aromatic: Optional[np.ndarray] = None
self._is_ligand: Optional[np.ndarray] = None
self._residue_type: Optional[np.ndarray] = None
# Bond storage
self._bonds: Optional["BondList"] = None
self._file_bonds: Optional["BondList"] = None # Bonds from PDB/mmCIF files
# Spatial indexing - lazy initialization
self._spatial_index: Optional["KDTree"] = None
self._coords_hash: Optional[int] = None
# Track custom annotations
self._custom_annotations: set = set()
@property
def coord(self) -> np.ndarray:
"""
Atomic coordinates array (n_atoms, 3).
Returns:
NumPy array of atomic coordinates
"""
return self._coord # type: ignore[no-any-return]
@coord.setter
def coord(self, value: np.ndarray) -> None:
"""
Set atomic coordinates and invalidate spatial index.
Args:
value: New coordinate array (n_atoms, 3)
"""
if value.shape != (self.n_atoms, 3):
raise ValueError(
f"Coordinates must have shape ({self.n_atoms}, 3), got {value.shape}"
)
self._coord = value.astype(np.float64)
self._invalidate_spatial_index()
[docs]
def add_annotation(
self, name: str, dtype: Any = np.float32, default_value: Any = None
) -> None:
"""
Add custom annotation to structure.
Args:
name: Name of the annotation
dtype: NumPy data type for the annotation
default_value: Default value to fill the array (optional)
Raises:
ValueError: If annotation name already exists
"""
if hasattr(self, name):
raise ValueError(f"Annotation '{name}' already exists")
array = np.empty(self.n_atoms, dtype=dtype)
if default_value is not None:
array.fill(default_value)
setattr(self, name, array)
self._custom_annotations.add(name)
def _ensure_annotation(
self, annotation_name: str, dtype: Any, default_value: Any = None
) -> np.ndarray:
"""
Ensure optional annotation exists, creating it if necessary.
Args:
annotation_name: Name of the annotation attribute
dtype: NumPy data type
default_value: Default value to fill array
Returns:
The annotation array
"""
annotation = getattr(self, annotation_name)
if annotation is None:
annotation = np.empty(self.n_atoms, dtype=dtype)
if default_value is not None:
annotation.fill(default_value)
setattr(self, annotation_name, annotation)
return annotation # type: ignore
@property
def is_backbone(self) -> np.ndarray:
"""Boolean array indicating backbone atoms."""
if self._is_backbone is None:
self._update_classification_flags()
return self._is_backbone # type: ignore
@property
def is_sidechain(self) -> np.ndarray:
"""Boolean array indicating sidechain atoms."""
if self._is_sidechain is None:
self._update_classification_flags()
return self._is_sidechain # type: ignore
@property
def is_aromatic(self) -> np.ndarray:
"""Boolean array indicating aromatic atoms."""
if self._is_aromatic is None:
self._update_classification_flags()
return self._is_aromatic # type: ignore
@property
def is_ligand(self) -> np.ndarray:
"""Boolean array indicating ligand atoms."""
if self._is_ligand is None:
self._update_classification_flags()
return self._is_ligand # type: ignore
@property
def residue_type(self) -> np.ndarray:
"""String array with residue type classification (PROTEIN/DNA/RNA/LIGAND)."""
if self._residue_type is None:
self._update_classification_flags()
return self._residue_type # type: ignore
def _update_classification_flags(self) -> None:
"""Update all classification flags based on current structure data."""
# Initialize arrays
self._is_backbone = np.zeros(self.n_atoms, dtype=bool)
self._is_sidechain = np.zeros(self.n_atoms, dtype=bool)
self._is_aromatic = np.zeros(self.n_atoms, dtype=bool)
self._is_ligand = np.zeros(self.n_atoms, dtype=bool)
self._residue_type = np.full(self.n_atoms, "LIGAND", dtype="U7")
# Classify residue types first
protein_mask = np.isin(self.res_name, PROTEIN_RESIDUES)
dna_mask = np.isin(self.res_name, DNA_RESIDUES)
rna_mask = np.isin(self.res_name, RNA_RESIDUES)
water_mask = np.isin(self.res_name, WATER_MOLECULES)
self._residue_type[protein_mask] = "PROTEIN"
self._residue_type[dna_mask] = "DNA"
self._residue_type[rna_mask] = "RNA"
# Water is not considered ligand for our purposes
ligand_mask = ~(protein_mask | dna_mask | rna_mask | water_mask)
self._is_ligand[ligand_mask] = True
# Backbone classification
protein_backbone_mask = protein_mask & np.isin(
self.atom_name, PROTEIN_BACKBONE_ATOMS
)
nucleic_backbone_mask = (dna_mask | rna_mask) & np.isin(
self.atom_name, DNA_RNA_BACKBONE_ATOMS
)
self._is_backbone[protein_backbone_mask | nucleic_backbone_mask] = True
# Sidechain classification
protein_sidechain_mask = protein_mask & np.isin(
self.atom_name, PROTEIN_SIDECHAIN_ATOMS
)
nucleic_base_mask = (dna_mask | rna_mask) & np.isin(
self.atom_name, DNA_RNA_BASE_ATOMS
)
self._is_sidechain[protein_sidechain_mask | nucleic_base_mask] = True
# Aromatic classification - atoms in aromatic residues
aromatic_residue_mask = np.isin(self.res_name, RESIDUES_WITH_AROMATIC_RINGS)
self._is_aromatic[aromatic_residue_mask] = True
[docs]
def __getitem__(self, index: Union[int, slice, np.ndarray]) -> "Structure":
"""
Get subset of structure by index.
Args:
index: Integer, slice, or boolean/integer array for indexing
Returns:
New Structure containing selected atoms
Example:
>>> subset = structure[structure.element == "C"]
>>> single_atom = structure[0]
>>> chain_a = structure[structure.chain_id == "A"]
"""
if isinstance(index, int):
index = slice(index, index + 1)
elif isinstance(index, np.ndarray) and index.dtype == bool:
index = np.where(index)[0]
# Determine number of atoms in subset
if isinstance(index, slice):
n_subset = len(range(*index.indices(self.n_atoms)))
else:
n_subset = len(index) # type: ignore
# Create new structure
subset = Structure(n_subset)
# Copy core annotations
subset.coord = self.coord[index].copy()
subset.atom_name = self.atom_name[index].copy()
subset.element = self.element[index].copy()
subset.res_name = self.res_name[index].copy()
subset.res_id = self.res_id[index].copy()
subset.chain_id = self.chain_id[index].copy()
# Copy optional annotations if they exist
for attr_name in [
"alt_loc",
"occupancy",
"b_factor",
"charge",
"serial",
"insertion_code",
"segment_id",
]:
attr_value = getattr(self, attr_name)
if attr_value is not None:
setattr(subset, attr_name, attr_value[index].copy())
# Copy custom annotations
for annotation_name in self._custom_annotations:
annotation = getattr(self, annotation_name)
setattr(subset, annotation_name, annotation[index].copy())
subset._custom_annotations.add(annotation_name)
return subset
[docs]
def __len__(self) -> int:
"""Return number of atoms in structure."""
return self.n_atoms
[docs]
def copy(self) -> "Structure":
"""
Create deep copy of structure.
Returns:
New Structure with copied data
"""
return self[:]
[docs]
def get_center(self, weights: Optional[np.ndarray] = None) -> np.ndarray:
"""
Calculate geometric or mass-weighted center of structure.
Args:
weights: Optional weights for each atom (e.g., atomic masses)
Returns:
3D coordinate of center as numpy array
"""
if weights is None:
return np.mean(self.coord, axis=0) # type: ignore
else:
if len(weights) != self.n_atoms:
raise ValueError(
"Weights array must have same length as number of atoms"
)
return np.average(self.coord, axis=0, weights=weights) # type: ignore
[docs]
def get_masses(self) -> np.ndarray:
"""
Get atomic masses for all atoms.
Returns:
Array of atomic masses in amu
"""
masses = np.zeros(self.n_atoms)
for i, element in enumerate(self.element):
masses[i] = AtomicData.ATOMIC_MASSES.get(
element.upper(), AtomicData.DEFAULT_ATOMIC_MASS
)
return masses # type: ignore
[docs]
def translate(self, vector: np.ndarray) -> None:
"""
Translate structure by given vector.
Args:
vector: 3D translation vector
"""
if len(vector) != 3:
raise ValueError("Translation vector must be 3D")
self.coord += vector
self._invalidate_spatial_index()
[docs]
def center_at_origin(self, weights: Optional[np.ndarray] = None) -> None:
"""
Center structure at origin.
Args:
weights: Optional weights for center calculation
"""
center = self.get_center(weights)
self.translate(-center)
def _ensure_spatial_index(self) -> "KDTree":
"""
Ensure spatial index is built and up-to-date.
Returns:
KDTree spatial index for this structure
Raises:
ImportError: If scipy is not available
"""
if not HAS_SCIPY:
raise ImportError(
"scipy is required for spatial indexing. Install with: pip install scipy"
)
# Calculate hash of current coordinates
current_hash = hash(self.coord.tobytes())
# Build/rebuild index if needed
if self._spatial_index is None or self._coords_hash != current_hash:
self._spatial_index = KDTree(self.coord)
self._coords_hash = current_hash
return self._spatial_index
def _invalidate_spatial_index(self) -> None:
"""Invalidate spatial index after coordinate changes."""
self._spatial_index = None
self._coords_hash = None
[docs]
def get_neighbors_within(self, atom_idx: int, radius: float) -> np.ndarray:
"""
Get atom indices within radius of specified atom.
Args:
atom_idx: Index of query atom
radius: Search radius in Angstroms
Returns:
Array of neighbor atom indices (excluding query atom)
Example:
>>> neighbors = structure.get_neighbors_within(100, 5.0)
"""
if atom_idx < 0 or atom_idx >= self.n_atoms:
raise IndexError(f"Atom index {atom_idx} out of range")
spatial_index = self._ensure_spatial_index()
query_point = self.coord[atom_idx]
neighbor_indices = spatial_index.query_ball_point(query_point, radius)
# Remove query atom from results
return np.array([idx for idx in neighbor_indices if idx != atom_idx]) # type: ignore[no-any-return]
[docs]
def get_atoms_within_sphere(self, center: np.ndarray, radius: float) -> np.ndarray:
"""
Get atoms within spherical region.
Args:
center: Center point of sphere (x, y, z)
radius: Radius of sphere in Angstroms
Returns:
Array of atom indices within the sphere
Example:
>>> center = np.array([10.0, 15.0, 20.0])
>>> atoms = structure.get_atoms_within_sphere(center, 8.0)
"""
if len(center) != 3:
raise ValueError("Center must be 3D coordinate")
spatial_index = self._ensure_spatial_index()
neighbor_indices = spatial_index.query_ball_point(center, radius)
return np.array(neighbor_indices) # type: ignore[no-any-return]
[docs]
def get_atoms_within_cog_sphere(
self, selection: np.ndarray, radius: float
) -> np.ndarray:
"""
Get atoms within spherical zone centered at center of geometry of selection.
Args:
selection: Boolean mask or indices of atoms to define COG
radius: Radius of spherical zone in Angstroms
Returns:
Array of atom indices within the COG sphere
Example:
>>> active_site = structure.select("resname HIS")
>>> nearby = structure.get_atoms_within_cog_sphere(active_site, 10.0)
"""
if len(selection) == 0:
return np.array([]) # type: ignore[no-any-return]
# Handle both boolean masks and index arrays
if selection.dtype == bool:
if len(selection) != self.n_atoms:
raise ValueError("Boolean selection must match number of atoms")
selected_coords = self.coord[selection]
else:
# Assume it's an array of indices
selected_coords = self.coord[selection]
if len(selected_coords) == 0:
return np.array([]) # type: ignore[no-any-return]
# Calculate center of geometry
cog = np.mean(selected_coords, axis=0)
# Find atoms within sphere around COG
return self.get_atoms_within_sphere(cog, radius)
[docs]
def get_neighbors_for_atoms(
self, atom_indices: np.ndarray, radius: float
) -> Dict[int, np.ndarray]:
"""
Get neighbors for multiple atoms at once (batch operation).
Args:
atom_indices: Array of atom indices to query
radius: Search radius in Angstroms
Returns:
Dictionary mapping atom_idx -> array of neighbor indices
Example:
>>> ca_atoms = structure.select("name CA")
>>> neighbors = structure.get_neighbors_for_atoms(ca_atoms, 8.0)
"""
spatial_index = self._ensure_spatial_index()
query_points = self.coord[atom_indices]
neighbor_lists = spatial_index.query_ball_point(query_points, radius)
result = {}
for i, atom_idx in enumerate(atom_indices):
# Remove query atom from neighbors
neighbors = [idx for idx in neighbor_lists[i] if idx != atom_idx]
result[atom_idx] = np.array(neighbors)
return result
[docs]
def get_closest_atoms(
self, query_point: np.ndarray, k: int = 1
) -> Tuple[np.ndarray, np.ndarray]:
"""
Get k nearest atoms to a query point.
Args:
query_point: 3D coordinate to query
k: Number of nearest neighbors to return
Returns:
Tuple of (distances, atom_indices) for k nearest atoms
Example:
>>> center = np.array([0.0, 0.0, 0.0])
>>> distances, indices = structure.get_closest_atoms(center, k=5)
"""
if len(query_point) != 3:
raise ValueError("Query point must be 3D coordinate")
spatial_index = self._ensure_spatial_index()
distances, indices = spatial_index.query(query_point, k=min(k, self.n_atoms))
# Ensure we return arrays even for k=1
if k == 1:
distances = np.array([distances])
indices = np.array([indices])
return distances, indices
[docs]
def get_atoms_between_selections(
self, selection1: np.ndarray, selection2: np.ndarray, max_distance: float
) -> Dict[str, np.ndarray]:
"""
Find atoms from two selections within max_distance of each other.
Args:
selection1: First selection (boolean mask or indices)
selection2: Second selection (boolean mask or indices)
max_distance: Maximum distance between selections
Returns:
Dictionary with 'selection1_atoms', 'selection2_atoms', 'distances'
Example:
>>> protein = structure.select("protein")
>>> ligand = structure.select("resname LIG")
>>> contacts = structure.get_atoms_between_selections(protein, ligand, 5.0)
"""
# Convert selections to index arrays if needed
if selection1.dtype == bool:
sel1_indices = np.where(selection1)[0]
else:
sel1_indices = selection1
if selection2.dtype == bool:
sel2_indices = np.where(selection2)[0]
else:
sel2_indices = selection2
spatial_index = self._ensure_spatial_index()
# Find all pairs within max_distance
sel1_contacts = []
sel2_contacts = []
distances = []
for atom1_idx in sel1_indices:
query_point = self.coord[atom1_idx]
neighbors = spatial_index.query_ball_point(query_point, max_distance)
for neighbor_idx in neighbors:
if neighbor_idx in sel2_indices:
distance = np.linalg.norm(
self.coord[atom1_idx] - self.coord[neighbor_idx]
)
sel1_contacts.append(atom1_idx)
sel2_contacts.append(neighbor_idx)
distances.append(distance)
return {
"selection1_atoms": np.array(sel1_contacts),
"selection2_atoms": np.array(sel2_contacts),
"distances": np.array(distances),
}
[docs]
def has_spatial_index(self) -> bool:
"""
Check if spatial index is available.
Returns:
True if scipy is available and spatial indexing is possible
"""
return HAS_SCIPY
[docs]
def get_bonds_to(
self, other_atoms: np.ndarray, max_distance: float = 2.0
) -> np.ndarray:
"""
Find potential bonds to other atoms based on distance.
Args:
other_atoms: Indices of other atoms to check
max_distance: Maximum distance for bond consideration
Returns:
Boolean array indicating which atoms have potential bonds
"""
# Simple distance-based bond detection
bonds = np.zeros(self.n_atoms, dtype=bool)
for other_idx in other_atoms:
distances = np.linalg.norm(self.coord - self.coord[other_idx], axis=1)
# Exclude self-bonds
distances[other_idx] = np.inf
bonds |= distances <= max_distance
return bonds # type: ignore
[docs]
def select(self, selection_string: str) -> np.ndarray:
"""
Select atoms using selection language.
Args:
selection_string: Selection expression
Returns:
Boolean array of selected atoms
Examples:
>>> mask = structure.select("protein and backbone")
>>> mask = structure.select("resname ALA GLY")
>>> mask = structure.select("chain A and resid 1:50")
Raises:
NotImplementedError: For unsupported selection syntax
"""
from pyparsing import ParseException
from ..selection import select
try:
return select(self, selection_string)
except ParseException as e:
# Convert parser errors to NotImplementedError for unsupported selections
raise NotImplementedError(f"Unsupported selection syntax: {e}") from e
[docs]
def get_annotation_info(self) -> Dict[str, Dict[str, Any]]:
"""
Get information about all annotations in the structure.
Returns:
Dictionary with annotation info including dtype and whether it's initialized
"""
info = {}
# Core annotations
core_annotations = [
"coord",
"atom_name",
"element",
"res_name",
"res_id",
"chain_id",
]
for name in core_annotations:
attr = getattr(self, name)
info[name] = {
"type": "core",
"dtype": attr.dtype,
"shape": attr.shape,
"initialized": True,
}
# Optional annotations
optional_annotations = [
"alt_loc",
"occupancy",
"b_factor",
"charge",
"serial",
"insertion_code",
"segment_id",
]
for name in optional_annotations:
attr = getattr(self, name)
info[name] = {
"type": "optional",
"dtype": attr.dtype if attr is not None else None,
"shape": attr.shape if attr is not None else None,
"initialized": attr is not None,
}
# Custom annotations
for name in self._custom_annotations:
attr = getattr(self, name)
info[name] = {
"type": "custom",
"dtype": attr.dtype,
"shape": attr.shape,
"initialized": True,
}
return info
[docs]
def __repr__(self) -> str:
"""String representation of Structure."""
return f"Structure(n_atoms={self.n_atoms})"
[docs]
def __str__(self) -> str:
"""Detailed string representation."""
lines = [f"Structure with {self.n_atoms} atoms"]
# Show residue type distribution
if self._residue_type is not None:
unique_types, counts = np.unique(self.residue_type, return_counts=True)
type_info = ", ".join(
[f"{count} {type_}" for type_, count in zip(unique_types, counts)]
)
lines.append(f"Residue types: {type_info}")
# Show chain distribution
unique_chains, counts = np.unique(self.chain_id, return_counts=True)
if len(unique_chains) > 1:
chain_info = ", ".join(
[
f"{count} in chain {chain}"
for chain, count in zip(unique_chains, counts)
]
)
lines.append(f"Chains: {chain_info}")
# Show annotation status
annotation_info = self.get_annotation_info()
optional_initialized = sum(
1
for info in annotation_info.values()
if info["type"] == "optional" and info["initialized"]
)
custom_count = len(self._custom_annotations)
lines.append(
f"Annotations: {optional_initialized} optional, {custom_count} custom"
)
return "\n".join(lines)
[docs]
@classmethod
def from_pdb(cls, filename: str) -> "Structure":
"""
Create Structure from PDB file.
Args:
filename: Path to PDB file
Returns:
Structure object with all atoms and annotations
Raises:
ValueError: If PDB file contains multiple models
Example:
>>> structure = Structure.from_pdb("example.pdb")
>>> print(f"Loaded {structure.n_atoms} atoms")
"""
from ..io.pdb import PDBParser
parser = PDBParser()
result = parser.parse_file(filename)
if isinstance(result, cls):
return result
else:
raise ValueError(
"PDB file contains multiple models, use StructureEnsemble.from_pdb() instead"
)
[docs]
@classmethod
def from_mmcif(cls, filename: str) -> "Structure":
"""
Create Structure from mmCIF file.
Args:
filename: Path to mmCIF file
Returns:
Structure object with all atoms and annotations
Raises:
ValueError: If mmCIF file contains multiple models
Example:
>>> structure = Structure.from_mmcif("example.cif")
>>> print(f"Loaded {structure.n_atoms} atoms")
"""
from ..io.mmcif import mmCIFParser
parser = mmCIFParser()
result = parser.parse_file(filename)
if isinstance(result, cls):
return result
else:
raise ValueError(
"mmCIF file contains multiple models, use StructureEnsemble.from_mmcif() instead"
)
[docs]
@classmethod
def from_pdb_string(cls, pdb_content: str) -> "Structure":
"""
Create Structure from PDB content string.
Args:
pdb_content: PDB file content as string
Returns:
Structure object with all atoms and annotations
Raises:
ValueError: If PDB content contains multiple models
Example:
>>> pdb_data = "ATOM 1 N ALA A 1 20.154 16.967 22.478 1.00 10.00 N"
>>> structure = Structure.from_pdb_string(pdb_data)
"""
from ..io.pdb import PDBParser
parser = PDBParser()
result = parser.parse_string(pdb_content)
if isinstance(result, cls):
return result
else:
raise ValueError(
"PDB content contains multiple models, use StructureEnsemble.from_pdb_string() instead"
)
[docs]
@classmethod
def from_mmcif_string(cls, mmcif_content: str) -> "Structure":
"""
Create Structure from mmCIF content string.
Args:
mmcif_content: mmCIF file content as string
Returns:
Structure object with all atoms and annotations
Raises:
ValueError: If mmCIF content contains multiple models
Example:
>>> mmcif_data = "data_test\\nloop_\\n_atom_site.group_PDB\\n..."
>>> structure = Structure.from_mmcif_string(mmcif_data)
"""
from ..io.mmcif import mmCIFParser
parser = mmCIFParser()
result = parser.parse_string(mmcif_content)
if isinstance(result, cls):
return result
else:
raise ValueError(
"mmCIF content contains multiple models, use StructureEnsemble.from_mmcif_string() instead"
)
[docs]
def detect_bonds(
self,
vdw_factor: float = 0.75,
use_file_bonds: bool = True,
store_bonds: bool = True,
) -> "BondList":
"""
Detect bonds using the simplified default detector.
Args:
vdw_factor: Factor for VdW radii in distance detection (0.0 < factor <= 1.0)
use_file_bonds: Whether to include file-based bonds (CONECT, mmCIF)
store_bonds: Whether to store detected bonds on structure
Returns:
BondList with detected bonds
Example:
>>> structure = Structure.from_pdb("protein.pdb")
>>> bonds = structure.detect_bonds()
>>> print(f"Detected {len(bonds)} bonds")
"""
from ..bond_detection import DefaultBondDetector
detector = DefaultBondDetector(vdw_factor=vdw_factor)
bonds = detector.detect_bonds(self, use_file_bonds=use_file_bonds)
if store_bonds:
self._bonds = bonds
return bonds
@property
def bonds(self) -> Optional["BondList"]:
"""
Get bonds associated with this structure.
Returns:
BondList if bonds have been detected/assigned, None otherwise
"""
return getattr(self, "_bonds", None)
@bonds.setter
def bonds(self, bond_list: "BondList") -> None:
"""
Set bonds for this structure.
Args:
bond_list: BondList to associate with structure
"""
self._bonds = bond_list
[docs]
def has_bonds(self) -> bool:
"""
Check if structure has bond information.
Returns:
True if bonds are available
"""
bonds = self.bonds
return bonds is not None and len(bonds) > 0
@property
def file_bonds(self) -> Optional["BondList"]:
"""
Get bonds loaded from structure files (PDB CONECT, mmCIF bonds).
Returns:
BondList with file-based bonds or None if not available
"""
return self._file_bonds
@file_bonds.setter
def file_bonds(self, bond_list: "BondList") -> None:
"""
Set file-based bonds for this structure.
Args:
bond_list: BondList with file-based bonds
"""
self._file_bonds = bond_list
[docs]
def has_file_bonds(self) -> bool:
"""
Check if structure has file-based bond information.
Returns:
True if file bonds are available
"""
file_bonds = self.file_bonds
return file_bonds is not None and len(file_bonds) > 0