Source code for smores._internal.combine

import itertools
import typing
from dataclasses import dataclass

import rdkit.Chem.AllChem as rdkit
import stk


[docs]def rdkit_from_smiles(smiles: str) -> rdkit.Mol: """ Get an embeded :mod:`rdkit` molecule from SMILES. Examples: *Create an rdkit molecule* .. doctest:: create-an-rdkit-molecule >>> import smores >>> molecule = smores.rdkit_from_smiles("Br") >>> import rdkit.Chem.AllChem as rdkit >>> rdkit.MolToSmiles(molecule) '[H]Br' Parameters: smiles: The SMILES of the molecule. Returns: The :mod:`rdkit` molecule. """ molecule = rdkit.AddHs(rdkit.MolFromSmiles(smiles)) _optimize(molecule) rdkit.Kekulize(molecule) return molecule
[docs]@dataclass class Combination: """ A combination of a core and substituent molecule. """ #: The core molecule. core: rdkit.Mol #: The substituent molecule. substituent: rdkit.Mol #: The combination of the :attr:`core` and :attr:`substituent`. product: rdkit.Mol #: The index of the dummy atom. dummy_index: int #: The index of the attached atom of the substituent. attached_index: int #: The indices of atoms in :attr:`product`, #: which are from the :attr:`core`. core_indices: list[int] #: The indices of atoms in :attr:`product`, which are #: from the :attr:`substituent`. substituent_indices: list[int]
[docs]def combine( cores: typing.Iterable[rdkit.Mol], substituents: typing.Iterable[rdkit.Mol], join_atom: typing.Literal["F", "Cl", "Br"] = "Br", optimize: bool = True, ) -> typing.Iterator[Combination]: """ Yield a set of molecules by combining `cores` with `substituents`. This function combines core and substituent molecules by creating a bond where the `join_atom` is specified. For example, if the join atom is Br, the operation is:: CCBr + BrCCN -> CCCCN The function produces every possible pairwise combination of `cores` and `substituents`. Examples: *Compare the steric parameters of various substituents* .. testcode:: compare-steric-parameters-of-substituents import smores import rdkit.Chem.AllChem as rdkit cores=[ smores.rdkit_from_smiles("NCBr"), ] substituents=[ smores.rdkit_from_smiles("CCBr"), smores.rdkit_from_smiles("CCCCBr"), ] for combo in smores.combine(cores, substituents): molecule = smores.Molecule.from_combination(combo) params = molecule.get_steric_parameters() print( f"Combination of \ {rdkit.MolToSmiles(rdkit.RemoveHs(combo.core))} and " f"{rdkit.MolToSmiles(rdkit.RemoveHs(combo.substituent))} " f"has SMORES parameters of {params}." ) .. testoutput:: compare-steric-parameters-of-substituents Combination of NCBr and CCBr has SMORES parameters of \ StericParameters(L=4.768506137873876, B1=1.7742858563716175, \ B5=3.430948246451143). Combination of NCBr and CCCCBr has SMORES parameters of \ StericParameters(L=6.713047422661813, B1=1.7856585950046786, \ B5=3.5556262756974535). Parameters: cores (list[Mol]): The core molecules. substituents (list[Mol]): The substituent molecules. join_atom: The atom in `cores` and `substituents` which specifies the location where they are joined. optimize: If ``True``, the generated molecules will have an optimized structure generated with ETKDG_. Yield: A combination of a core and substituent molecule. .. _ETKDG: https://www.rdkit.org/docs/source/\ rdkit.Chem.rdDistGeom.html#rdkit.Chem.rdDistGeom.ETKDGv3 """ for core, substituent in itertools.product(cores, substituents): core_bb = _get_building_block(core, join_atom) subsituent_bb = _get_building_block(substituent, join_atom) construct = stk.ConstructedMolecule( topology_graph=stk.polymer.Linear( building_blocks=(core_bb, subsituent_bb), repeating_unit="AB", num_repeating_units=1, ), ) steric_atoms = _get_steric_atoms(construct, core_bb) product = construct.to_rdkit_mol() if optimize: _optimize(product) core_indices = [] substituent_indices = [] for atom_info in construct.get_atom_infos(): if atom_info.get_building_block() is core_bb: core_indices.append(atom_info.get_atom().get_id()) if atom_info.get_building_block() is subsituent_bb: substituent_indices.append(atom_info.get_atom().get_id()) yield Combination( core=core, substituent=substituent, core_indices=core_indices, substituent_indices=substituent_indices, product=product, dummy_index=steric_atoms.core_atom, attached_index=steric_atoms.substituent_atom, )
def _get_building_block( molecule: rdkit.Mol, join_atom: str, ) -> stk.BuildingBlock: factories = { "F": stk.FluoroFactory(), "Br": stk.BromoFactory(), "I": stk.IodoFactory(), } rdkit.SanitizeMol(molecule) rdkit.Kekulize(molecule) return stk.BuildingBlock.init_from_rdkit_mol( molecule=molecule, functional_groups=[factories[join_atom]], ) @dataclass(frozen=True, slots=True) class _StericAtoms: core_atom: int substituent_atom: int def _get_steric_atoms( construct: stk.ConstructedMolecule, core: stk.BuildingBlock, ) -> _StericAtoms: added_bond = next( bond_info.get_bond() for bond_info in construct.get_bond_infos() if bond_info.get_building_block() is None ) added_bond_atoms = ( added_bond.get_atom1().get_id(), added_bond.get_atom2().get_id(), ) core_atom = next( atom_info.get_atom() for atom_info in construct.get_atom_infos() if atom_info.get_building_block() is core and atom_info.get_atom().get_id() in added_bond_atoms ) substituent_atom = ( added_bond.get_atom1() if core_atom.get_id() == added_bond.get_atom2().get_id() else added_bond.get_atom2() ) return _StericAtoms( core_atom=core_atom.get_id(), substituent_atom=substituent_atom.get_id(), ) def _optimize(molecule: rdkit.Mol) -> None: etkdg = rdkit.ETKDGv3() etkdg.randomSeed = 4 rdkit.EmbedMolecule(molecule, etkdg)