Source code for smores._internal.molecule

import pathlib
import typing

import morfeus
import numpy as np
import numpy.typing as npt
import rdkit.Chem.AllChem as rdkit

from smores._internal.combine import Combination
from smores._internal.constants import streusel_radii
from smores._internal.steric_parameters import StericParameters


[docs]class Molecule: """ Calculates :class:`.StericParameters` from STREUSEL radii. See also: * :class:`.EspMolecule`: For calculating steric parameters from electrostatic potentials. Examples: *Calculate steric parameters* .. doctest:: molecule-calculate-steric-parameters >>> import smores >>> molecule = smores.Molecule.from_smiles("Cl", \ dummy_index=0, attached_index=1) >>> molecule.get_steric_parameters() StericParameters(L=3.57164113574581, \ B1=1.9730970556668774, B5=2.320611610648539) .. tip:: There are many different way to initialize a :class:`.Molecule`, for example :meth:`from_mol_file` or :meth:`from_xyz_file`. See the list of methods below. *Use custom atomic radii* If we want to use custom atomic radii, we can simply make use of the `radii` parameter, where we provide the desired radius of each atom .. doctest:: custom-atomic-radii >>> import smores >>> molecule = smores.Molecule.from_smiles("Cl", \ dummy_index=0, attached_index=1, radii=[1.75, 1.2]) >>> molecule.get_steric_parameters() StericParameters(L=3.57164113574581, \ B1=1.9730970556668774, B5=2.320611610648539) """ #: The atoms of the molecule. _atoms: tuple[str, ...] #: The N x 3 position matrix of the molecule. _positions: npt.NDArray[np.float32] def __init__( self, atoms: typing.Iterable[str], positions: npt.ArrayLike, dummy_index: int, attached_index: int, excluded_indices: typing.Iterable[int] | None = None, radii: npt.ArrayLike | None = None, ) -> None: """ Initialize a :class:`.Molecule`. Parameters: atoms (list[str]): The elemental symbol of each atom of the molecule. positions (list[list[float]]): The coordinates of each atom of the molecule, provided as an N x 3 matrix. dummy_index: The index of the dummy atom. attached_index: The index of the attached atom of the substituent. excluded_indices (list[int]): The indices of atoms which are not included in the parameter calculation. radii (list[float]): The radius of each atom of the molecule. If ``None`` the STREUSEL_ radii will be used. .. _STREUSEL: https://streusel.readthedocs.io """ if radii is None: radii = np.array([streusel_radii[atom] for atom in atoms]) else: radii = np.array(radii) self._atoms = tuple(atoms) self._positions = np.array(positions) self._radii = radii self._morfeus_dummy_index = dummy_index + 1 self._morfeus_attached_index = attached_index + 1 self._morfeus_excluded_indices = ( None if excluded_indices is None else tuple(index + 1 for index in excluded_indices) )
[docs] def get_dummy_index(self) -> int: """ Get the index of the dummy atom. Returns: The index of the dummy atom. """ return self._morfeus_dummy_index - 1
[docs] def get_attached_index(self) -> int: """ Get the index of the atom attached to the substituent. Returns: The index of the atom attached to the substituent. """ return self._morfeus_attached_index - 1
[docs] def get_excluded_indices(self) -> typing.Iterable[int]: """ Yield indices of atoms excluded from the parameter calculation. Yields: The index of an excluded atom. """ if self._morfeus_excluded_indices is None: return for index in self._morfeus_excluded_indices: yield index - 1
[docs] @classmethod def from_combination( cls, combination: Combination, radii: npt.ArrayLike | None = None, ) -> "Molecule": """ Get a molecule from a :class:`.Combination`. The molecule will automatically use the dummy and attached index defined in `combination` and set all the :attr:`~.Combination.core_indices` as excluded indices. Parameters: combination: A combination of a core and substituent molecule. radii (list[float]): The radius of each atom of the molecule. If ``None`` the STREUSEL_ radii will be used. Returns: The molecule. """ return cls.from_rdkit( molecule=combination.product, dummy_index=combination.dummy_index, attached_index=combination.attached_index, excluded_indices=combination.core_indices, radii=radii, )
[docs] @classmethod def from_xyz_file( cls, path: pathlib.Path | str, dummy_index: int, attached_index: int, excluded_indices: typing.Iterable[int] | None = None, radii: npt.ArrayLike | None = None, ) -> "Molecule": """ Get a molecule from a ``.xyz`` file. Parameters: path: The path to the file. dummy_index: The index of the dummy atom. attached_index: The index of the attached atom of the substituent. excluded_indices (list[int]): The indices of atoms which are not included in the parameter calculation. radii (list[float]): The radius of each atom of the molecule. If ``None`` the STREUSEL_ radii will be used. Returns: The molecule. """ instance = cls.__new__(cls) molecule = rdkit.MolFromXYZFile(str(path)) instance._atoms = tuple( atom.GetSymbol() for atom in molecule.GetAtoms() ) instance._positions = molecule.GetConformer(0).GetPositions() if radii is None: instance._radii = np.array( [streusel_radii[atom] for atom in instance._atoms] ) else: instance._radii = np.array(radii) instance._morfeus_dummy_index = dummy_index + 1 instance._morfeus_attached_index = attached_index + 1 instance._morfeus_excluded_indices = ( None if excluded_indices is None else tuple(index + 1 for index in excluded_indices) ) return instance
[docs] @classmethod def from_mol_file( cls, path: pathlib.Path | str, dummy_index: int, attached_index: int, excluded_indices: typing.Iterable[int] | None = None, radii: npt.ArrayLike | None = None, ) -> "Molecule": """ Get a molecule from a ``.mol`` file. Parameters: path: The path to the file. dummy_index: The index of the dummy atom. attached_index: The index of the attached atom of the substituent. excluded_indices (list[int]): The indices of atoms which are not included in the parameter calculation. radii (list[float]): The radius of each atom of the molecule. If ``None`` the STREUSEL_ radii will be used. Returns: The molecule. """ instance = cls.__new__(cls) molecule = rdkit.MolFromMolFile(str(path), removeHs=False) instance._atoms = tuple( atom.GetSymbol() for atom in molecule.GetAtoms() ) instance._positions = molecule.GetConformer(0).GetPositions() if radii is None: instance._radii = np.array( [streusel_radii[atom] for atom in instance._atoms] ) else: instance._radii = np.array(radii) instance._morfeus_dummy_index = dummy_index + 1 instance._morfeus_attached_index = attached_index + 1 instance._morfeus_excluded_indices = ( None if excluded_indices is None else tuple(index + 1 for index in excluded_indices) ) return instance
[docs] @classmethod def from_smiles( cls, smiles: str, dummy_index: int, attached_index: int, excluded_indices: typing.Iterable[int] | None = None, positions: npt.ArrayLike | None = None, radii: npt.ArrayLike | None = None, ) -> "Molecule": """ Get a molecule from a SMILES string. Parameters: smiles: The SMILES of the molecule. dummy_index: The index of the dummy atom. attached_index: The index of the attached atom of the substituent. excluded_indices (list[int]): The indices of atoms which are not included in the parameter calculation. positions (list[list[float]]): The coordinates of each atom of the molecule provided as an N x 3 matrix. If ``None`` then the molecule will have its coordinates calculated with ETKDG_. radii (list[float]): The radius of each atom of the molecule. If ``None`` the STREUSEL_ radii will be used. Returns: The molecule. .. _ETKDG: https://www.rdkit.org/docs/source/\ rdkit.Chem.rdDistGeom.html#rdkit.Chem.rdDistGeom.ETKDGv3 """ instance = cls.__new__(cls) molecule = rdkit.AddHs(rdkit.MolFromSmiles(smiles)) instance._atoms = tuple( atom.GetSymbol() for atom in molecule.GetAtoms() ) if positions is None: params = rdkit.ETKDGv3() params.randomSeed = 4 rdkit.EmbedMolecule(molecule, params) instance._positions = molecule.GetConformer(0).GetPositions() else: instance._positions = np.array(positions) if radii is None: instance._radii = np.array( [streusel_radii[atom] for atom in instance._atoms] ) else: instance._radii = np.array(radii) instance._morfeus_dummy_index = dummy_index + 1 instance._morfeus_attached_index = attached_index + 1 instance._morfeus_excluded_indices = ( None if excluded_indices is None else tuple(index + 1 for index in excluded_indices) ) return instance
[docs] @classmethod def from_rdkit( cls, molecule: rdkit.Mol, dummy_index: int, attached_index: int, excluded_indices: typing.Iterable[int] | None = None, radii: npt.ArrayLike | None = None, conformer_id: int = 0, ) -> "Molecule": """ Get a molecule from an :mod:`rdkit` molecule. Parameters: molecule: The :mod:`rdkit` molecule. It must have at least one conformer. dummy_index: The index of the dummy atom. attached_index: The index of the attached atom of the substituent. excluded_indices (list[int]): The indices of atoms which are not included in the parameter calculation. radii (list[float]): The radius of each atom of the molecule. If ``None`` the STREUSEL_ radii will be used. conformer_id: The id of the conformer to use. Returns: The :mod:`smores` molecule. """ instance = cls.__new__(cls) instance._atoms = tuple( atom.GetSymbol() for atom in molecule.GetAtoms() ) instance._positions = molecule.GetConformer( conformer_id ).GetPositions() if radii is None: instance._radii = np.array( [streusel_radii[atom] for atom in instance._atoms] ) else: instance._radii = np.array(radii) instance._morfeus_dummy_index = dummy_index + 1 instance._morfeus_attached_index = attached_index + 1 instance._morfeus_excluded_indices = ( None if excluded_indices is None else tuple(index + 1 for index in excluded_indices) ) return instance
[docs] def get_steric_parameters(self) -> StericParameters: """ Get the steric paramters from STREUSEL_ radii. Returns: The parameters. .. _STREUSEL: https://streusel.readthedocs.io """ sterimol = morfeus.Sterimol( elements=self._atoms, coordinates=self._positions, dummy_index=self._morfeus_dummy_index, attached_index=self._morfeus_attached_index, radii=self._radii, excluded_atoms=self._morfeus_excluded_indices, ) return StericParameters( L=sterimol.L_value, B1=sterimol.B_1_value, B5=sterimol.B_5_value, )