Examples#

This section provides comprehensive examples demonstrating MolR’s capabilities for various molecular analysis tasks.

Basic Structure Analysis#

Loading and Inspecting Structures#

import molr
import numpy as np

# Load a protein structure
structure = molr.Structure.from_pdb("1crn.pdb")

print(f"Structure has {structure.n_atoms} atoms")
print(f"Chains present: {np.unique(structure.chain_id)}")
print(f"Residue types: {np.unique(structure.res_name)}")

# Get basic structural information
center = structure.get_center()
print(f"Geometric center: {center}")

# Coordinate statistics
coord_range = np.ptp(structure.coord, axis=0)
print(f"Structure dimensions: {coord_range}")

Atom and Residue Statistics#

# Count atoms by element
elements, counts = np.unique(structure.element, return_counts=True)
for element, count in zip(elements, counts):
    print(f"{element}: {count} atoms")

# Residue composition
residues, res_counts = np.unique(structure.res_name, return_counts=True)
for residue, count in zip(residues, res_counts):
    print(f"{residue}: {count} residues")

# Chain analysis
for chain in np.unique(structure.chain_id):
    chain_mask = structure.chain_id == chain
    n_residues = len(np.unique(structure.res_id[chain_mask]))
    n_atoms = np.sum(chain_mask)
    print(f"Chain {chain}: {n_residues} residues, {n_atoms} atoms")

Bond Detection and Analysis#

Comprehensive Bond Detection#

from molr.bond_detection import DefaultBondDetector

# Load structure
structure = molr.Structure.from_pdb("2ptc.pdb")

# Configure bond detector
detector = DefaultBondDetector()

# Detect bonds
bonds = detector.detect_bonds(structure)

print(f"Total bonds detected: {len(bonds)}")

Bond Analysis by Residue Type#

# Analyze bonds by residue type
bond_stats = {}

for i in range(len(bonds)):
    atom1_idx, atom2_idx = bonds.get_bond(i)

    res1 = structure.res_name[atom1_idx]
    res2 = structure.res_name[atom2_idx]

    # Intra-residue bonds
    if res1 == res2:
        key = f"intra-{res1}"
    else:
        # Inter-residue bonds
        key = f"inter-{res1}-{res2}"

    bond_stats[key] = bond_stats.get(key, 0) + 1

# Display top bond types
sorted_bonds = sorted(bond_stats.items(), key=lambda x: x[1], reverse=True)
print("Top bond types:")
for bond_type, count in sorted_bonds[:10]:
    print(f"  {bond_type}: {count}")

Protein-Ligand Interaction Analysis#

Binding Site Identification#

# Load protein-ligand complex
structure = molr.Structure.from_pdb("2ptc.pdb")

# Identify ligand and protein
ligand_mask = structure.select("not protein and not water")
protein_mask = structure.select("protein")

if np.any(ligand_mask):
    ligand_residues = np.unique(structure.res_name[ligand_mask])
    print(f"Ligand residues: {ligand_residues}")

    # Find binding site residues
    binding_site_mask = structure.select(
        "protein and within 5.0 of (not protein and not water)"
    )

    # Get unique binding site residues
    binding_residues = []
    for res_id in np.unique(structure.res_id[binding_site_mask]):
        res_mask = structure.res_id == res_id
        res_name = structure.res_name[res_mask][0]
        chain = structure.chain_id[res_mask][0]
        binding_residues.append(f"{res_name}{res_id}{chain}")

    print(f"Binding site residues ({len(binding_residues)}):")
    for residue in binding_residues:
        print(f"  {residue}")

Contact Analysis#

# Detailed contact analysis
contacts = structure.get_atoms_between_selections(
    protein_mask, ligand_mask, max_distance=4.0
)

print(f"Found {len(contacts)} protein-ligand contacts")

# Analyze contact types
contact_types = {}

for protein_idx, ligand_idx in contacts:
    protein_atom = structure.atom_name[protein_idx]
    protein_res = structure.res_name[protein_idx]
    ligand_atom = structure.atom_name[ligand_idx]
    ligand_res = structure.res_name[ligand_idx]

    contact_key = f"{protein_res}:{protein_atom} - {ligand_res}:{ligand_atom}"
    contact_types[contact_key] = contact_types.get(contact_key, 0) + 1

# Display most common contacts
sorted_contacts = sorted(contact_types.items(), key=lambda x: x[1], reverse=True)
print("Most common contacts:")
for contact, count in sorted_contacts[:10]:
    print(f"  {contact}: {count}")

Advanced Selection Examples#

Complex Spatial Selections#

# Load structure
structure = molr.Structure.from_pdb("4hhb.pdb")  # Hemoglobin

# Find heme groups
heme_mask = structure.select("resname HEM")
if np.any(heme_mask):
    print(f"Found {np.sum(heme_mask)} heme atoms")

    # Find residues coordinating iron
    iron_mask = structure.select("resname HEM and name FE")
    if np.any(iron_mask):
        # Get iron coordinates
        iron_indices = np.where(iron_mask)[0]

        for iron_idx in iron_indices:
            # Find coordinating residues
            coord_mask = structure.select(
                f"protein and within 3.0 of index {iron_idx}"
            )

            coord_residues = []
            for res_id in np.unique(structure.res_id[coord_mask]):
                res_mask = structure.res_id == res_id
                res_name = structure.res_name[res_mask][0]
                chain = structure.chain_id[res_mask][0]
                coord_residues.append(f"{res_name}{res_id}{chain}")

            print(f"Iron {iron_idx} coordinated by: {coord_residues}")

Multi-Chain Analysis#

# Analyze each chain separately
chains = np.unique(structure.chain_id)

for chain in chains:
    chain_mask = structure.select(f"chain {chain}")
    chain_structure = structure[chain_mask]

    # Chain composition
    residue_count = len(np.unique(chain_structure.res_id))
    atom_count = chain_structure.n_atoms

    print(f"Chain {chain}: {residue_count} residues, {atom_count} atoms")

    # Secondary structure elements (simplified)
    backbone_mask = chain_structure.select("backbone")
    if np.any(backbone_mask):
        backbone_atoms = chain_structure[backbone_mask]
        # Could add secondary structure analysis here

Interface Analysis#

# Find inter-chain contacts
chain_pairs = []
chains = np.unique(structure.chain_id)

for i, chain1 in enumerate(chains):
    for chain2 in chains[i+1:]:
        chain1_mask = structure.chain_id == chain1
        chain2_mask = structure.chain_id == chain2

        # Find contacts between chains
        contacts = structure.get_atoms_between_selections(
            chain1_mask, chain2_mask, max_distance=4.0
        )

        if len(contacts) > 0:
            print(f"Chain {chain1} - Chain {chain2}: {len(contacts)} contacts")
            chain_pairs.append((chain1, chain2, contacts))

# Analyze interface residues
for chain1, chain2, contacts in chain_pairs:
    interface_residues = set()

    for atom1_idx, atom2_idx in contacts:
        res1_id = structure.res_id[atom1_idx]
        res2_id = structure.res_id[atom2_idx]

        interface_residues.add((chain1, res1_id))
        interface_residues.add((chain2, res2_id))

    print(f"Interface residues ({chain1}-{chain2}): {len(interface_residues)}")

Trajectory Analysis#

Multi-Model Structure Analysis#

# Load multi-model structure
ensemble = molr.StructureEnsemble.from_pdb("1bq0.pdb")

print(f"Loaded {ensemble.n_models} models")
print(f"Each model has {ensemble.n_atoms} atoms")

# Analyze structural variation
centers = []
ca_atoms_list = []

for i, model in enumerate(ensemble):
    center = model.get_center()
    centers.append(center)

    # Get CA atoms for each model
    ca_mask = model.select("name CA")
    ca_coords = model.coord[ca_mask]
    ca_atoms_list.append(ca_coords)

# Calculate center variations
centers = np.array(centers)
center_variation = np.std(centers, axis=0)
print(f"Center variation (x,y,z): {center_variation}")

RMSD Calculation#

# Calculate RMSD between models
if len(ca_atoms_list) > 1:
    reference = ca_atoms_list[0]  # First model as reference

    rmsds = []
    for i, coords in enumerate(ca_atoms_list[1:], 1):
        if coords.shape == reference.shape:
            # Simple RMSD calculation (without alignment)
            diff = coords - reference
            rmsd = np.sqrt(np.mean(np.sum(diff**2, axis=1)))
            rmsds.append(rmsd)
            print(f"Model {i} RMSD to reference: {rmsd:.2f} Å")

    if rmsds:
        print(f"Average RMSD: {np.mean(rmsds):.2f} Å")
        print(f"RMSD range: {np.min(rmsds):.2f} - {np.max(rmsds):.2f} Å")

Structural Flexibility Analysis#

# Analyze per-residue flexibility
if len(ca_atoms_list) > 2:
    ca_array = np.array(ca_atoms_list)  # Shape: (n_models, n_residues, 3)

    # Calculate per-residue RMSF (Root Mean Square Fluctuation)
    mean_positions = np.mean(ca_array, axis=0)
    fluctuations = []

    for i in range(ca_array.shape[1]):  # For each residue
        residue_coords = ca_array[:, i, :]  # All models for this residue
        deviations = residue_coords - mean_positions[i]
        rmsf = np.sqrt(np.mean(np.sum(deviations**2, axis=1)))
        fluctuations.append(rmsf)

    fluctuations = np.array(fluctuations)

    # Find most and least flexible residues
    max_flex_idx = np.argmax(fluctuations)
    min_flex_idx = np.argmin(fluctuations)

    # Get residue information from first model
    first_model = ensemble[0]
    ca_mask = first_model.select("name CA")
    res_ids = first_model.res_id[ca_mask]
    res_names = first_model.res_name[ca_mask]

    print(f"Most flexible residue: {res_names[max_flex_idx]}{res_ids[max_flex_idx]} "
          f"(RMSF: {fluctuations[max_flex_idx]:.2f} Å)")
    print(f"Least flexible residue: {res_names[min_flex_idx]}{res_ids[min_flex_idx]} "
          f"(RMSF: {fluctuations[min_flex_idx]:.2f} Å)")

Hydrogen Bond Analysis#

Simple Hydrogen Bond Detection#

# Load structure with hydrogens
structure = molr.Structure.from_pdb("6rsa.pdb")

# Detect all bonds first
bonds = structure.detect_bonds()

# Find potential hydrogen bonds (simplified criteria)
h_atoms = structure.select("element H")
donors = []

# Find hydrogen bond donors (N-H, O-H)
for h_idx in np.where(h_atoms)[0]:
    h_neighbors = bonds.get_neighbors(h_idx)

    for neighbor_idx in h_neighbors:
        neighbor_element = structure.element[neighbor_idx]
        if neighbor_element in ['N', 'O']:
            donors.append((neighbor_idx, h_idx))

print(f"Found {len(donors)} potential hydrogen bond donors")

# Find acceptors (N, O atoms not bonded to H)
acceptors = []
for atom_idx in range(structure.n_atoms):
    element = structure.element[atom_idx]
    if element in ['N', 'O']:
        neighbors = bonds.get_neighbors(atom_idx)
        neighbor_elements = [structure.element[n] for n in neighbors]

        # Simple criteria: N/O not saturated with H
        h_count = neighbor_elements.count('H')
        if (element == 'N' and h_count < 3) or (element == 'O' and h_count < 2):
            acceptors.append(atom_idx)

print(f"Found {len(acceptors)} potential hydrogen bond acceptors")

Hydrogen Bond Geometry Analysis#

# Analyze hydrogen bond geometry
potential_hbonds = []

for donor_heavy, h_atom in donors[:10]:  # Limit for example
    donor_coord = structure.coord[donor_heavy]
    h_coord = structure.coord[h_atom]

    # Find nearby acceptors
    for acceptor in acceptors:
        acceptor_coord = structure.coord[acceptor]

        # Distance criteria
        da_distance = np.linalg.norm(donor_coord - acceptor_coord)
        ha_distance = np.linalg.norm(h_coord - acceptor_coord)

        if 2.5 <= da_distance <= 3.5 and ha_distance <= 2.5:
            # Angle criteria (simplified)
            v1 = h_coord - donor_coord
            v2 = acceptor_coord - donor_coord

            cos_angle = np.dot(v1, v2) / (np.linalg.norm(v1) * np.linalg.norm(v2))
            angle = np.arccos(np.clip(cos_angle, -1, 1)) * 180 / np.pi

            if angle > 120:  # Reasonable hydrogen bond angle
                donor_res = f"{structure.res_name[donor_heavy]}{structure.res_id[donor_heavy]}"
                acceptor_res = f"{structure.res_name[acceptor]}{structure.res_id[acceptor]}"

                potential_hbonds.append({
                    'donor': donor_res,
                    'acceptor': acceptor_res,
                    'distance': da_distance,
                    'angle': angle
                })

print(f"Found {len(potential_hbonds)} potential hydrogen bonds")
for hb in potential_hbonds[:5]:  # Show first 5
    print(f"  {hb['donor']} -> {hb['acceptor']}: "
          f"{hb['distance']:.2f} Å, {hb['angle']:.1f}°")

Performance Benchmarking#

Spatial Query Performance#

import time

# Load large structure
structure = molr.Structure.from_pdb("3j3q.cif")  # Large ribosome structure

print(f"Benchmarking with {structure.n_atoms} atoms")

# Benchmark neighbor searches
atom_idx = structure.n_atoms // 2
radii = [3.0, 5.0, 8.0, 10.0]

for radius in radii:
    start_time = time.time()
    neighbors = structure.get_neighbors_within(atom_idx, radius)
    end_time = time.time()

    print(f"Radius {radius} Å: {len(neighbors)} neighbors in {end_time - start_time:.4f}s")

# Benchmark selections
selections = [
    "protein",
    "backbone",
    "element C",
    "within 5.0 of (chain A)"
]

for selection in selections:
    start_time = time.time()
    mask = structure.select(selection)
    end_time = time.time()

    print(f"Selection '{selection}': {np.sum(mask)} atoms in {end_time - start_time:.4f}s")

Memory Usage Analysis#

import sys

# Analyze memory usage of different operations
def get_size_mb(obj):
    return sys.getsizeof(obj) / 1024 / 1024

print(f"Structure object: {get_size_mb(structure):.2f} MB")
print(f"Coordinates: {get_size_mb(structure.coord):.2f} MB")
print(f"Atom names: {get_size_mb(structure.atom_name):.2f} MB")

# Bond detection memory usage
bonds = structure.detect_bonds()
print(f"Bonds object: {get_size_mb(bonds):.2f} MB")

# Selection memory usage
protein_mask = structure.select("protein")
print(f"Selection mask: {get_size_mb(protein_mask):.2f} MB")

This comprehensive set of examples demonstrates MolR’s capabilities across different use cases. Each example can be adapted and extended for specific research needs.