Source code for deepmol.utils.utils

import random

import pandas as pd
import joblib
import os
from typing import Any, cast, IO, List, Union, Tuple
import gzip
import pickle
import numpy as np

from rdkit.Chem import rdMolDescriptors, rdDepictor, Mol, RDKFingerprint, rdmolfiles, rdmolops
from rdkit.Chem import Draw
from IPython.display import SVG

from rdkit.Chem.Draw import rdMolDraw2D
from rdkit import Chem
import tempfile
from PIL import Image

from IPython.display import display

from deepmol.loggers import Logger


[docs]def smiles_to_mol(smiles: str, **kwargs) -> Union[Mol, None]: """ Convert SMILES to RDKit molecule object. Parameters ---------- smiles: str SMILES string to convert. kwargs: Keyword arguments for `rdkit.Chem.MolFromSmiles`. Returns ------- Mol RDKit molecule object. """ try: return Chem.MolFromSmiles(smiles, **kwargs) except TypeError: return None
[docs]def mol_to_smiles(mol: Mol, **kwargs) -> Union[str, None]: """ Convert SMILES to RDKit molecule object. Parameters ---------- mol: Mol RDKit molecule object to convert. kwargs: Keyword arguments for `rdkit.Chem.MolToSmiles`. Returns ------- smiles: str SMILES string. """ try: return Chem.MolToSmiles(mol, **kwargs) except TypeError: return None
[docs]def canonicalize_mol_object(mol_object: Mol) -> Mol: """ Canonicalize a molecule object. Parameters ---------- mol_object: Mol Molecule object to canonicalize. Returns ------- Mol Canonicalized molecule object. """ try: # SMILES is unique, so set a canonical order of atoms new_order = rdmolfiles.CanonicalRankAtoms(mol_object) mol_object = rdmolops.RenumberAtoms(mol_object, new_order) except Exception as e: mol_object = mol_object return mol_object
[docs]def load_pickle_file(input_file: str) -> Any: """ Load from single, possibly gzipped, pickle file. Parameters ---------- input_file: str The filename of pickle file. This function can load from gzipped pickle file like `XXXX.pkl.gz`. Returns ------- Any The object which is loaded from the pickle file. """ if ".gz" in input_file: with gzip.open(input_file, "rb") as unzipped_file: return pickle.load(cast(IO[bytes], unzipped_file)) else: with open(input_file, "rb") as opened_file: return pickle.load(opened_file)
[docs]def load_from_disk(filename: str) -> Any: """ Load object from file. Parameters ---------- filename: str A filename you want to load. Returns ------- Any A loaded object from file. """ name = filename if os.path.splitext(name)[1] == ".gz": name = os.path.splitext(name)[0] extension = os.path.splitext(name)[1] if extension == ".pkl": return load_pickle_file(filename) elif extension == ".joblib": return joblib.load(filename) elif extension == ".csv": # First line of user-specified CSV *must* be header. df = pd.read_csv(filename, header=0) df = df.replace(np.nan, str(""), regex=True) return df elif extension == ".npy": return np.load(filename, allow_pickle=True) else: raise ValueError("Unrecognized filetype for %s" % filename)
[docs]def normalize_labels_shape(y_pred: Union[List, np.ndarray], n_tasks: int) -> np.ndarray: """ Function to transform output from predict_proba (prob(0) prob(1)) to predict format (0 or 1). Parameters ---------- y_pred: array array with predictions n_tasks: int number of tasks Returns ------- labels Array of predictions in the format [0, 1, 0, ...]/[[0, 1, 0, ...], [0, 1, 1, ...], ...] """ if n_tasks == 1: labels = _normalize_singletask_labels_shape(y_pred) else: if isinstance(y_pred, np.ndarray): if len(y_pred.shape) == 3: y_pred = np.array([np.array([j[1] for j in i]) for i in y_pred]).T labels = [] for task in y_pred: labels.append(_normalize_singletask_labels_shape(task)) labels = np.array(labels).T return labels
def _normalize_singletask_labels_shape(y_pred: Union[List, np.ndarray]) -> np.ndarray: """ Function to transform output from predict_proba (prob(0) prob(1)) to predict format (0 or 1). Parameters ---------- y_pred: array array with predictions Returns ------- labels Array of predictions in the format [0, 1, 0, ...]/[[0, 1, 0, ...], [0, 1, 1, ...], ...] """ labels = [] # list of probabilities in the format [0.1, 0.9, 0.2, ...] if isinstance(y_pred[0], (np.floating, float)): return np.array(y_pred) # list of lists of probabilities in the format [[0.1], [0.2], ...] elif len(y_pred[0]) == 1: return np.array([i[0] for i in y_pred]) # list of lists of probabilities in the format [[0.1, 0.9], [0.2, 0.8], ...] elif len(y_pred[0]) == 2: return np.array([i[1] for i in y_pred]) elif len(y_pred[0]) > 2: return np.array([np.argmax(i) for i in y_pred]) else: raise ValueError("Unknown format for y_pred!") # DRAWING # TODO: check this (two keys) MACCSsmartsPatts = { 1: ('?', 0), # ISOTOPE 2: ('[#104,#105,#106,#107,#106,#109,#110,#111,#112]', 0), # atomic num >103 Not complete 2: ('[#104]', 0), # limit the above def'n since the RDKit only accepts up to #104 3: ('[#32,#33,#34,#50,#51,#52,#82,#83,#84]', 0), # Group IVa,Va,VIa Rows 4-6 4: ('[Ac,Th,Pa,U,Np,Pu,Am,Cm,Bk,Cf,Es,Fm,Md,No,Lr]', 0), # actinide 5: ('[Sc,Ti,Y,Zr,Hf]', 0), # Group IIIB,IVB (Sc...) 6: ('[La,Ce,Pr,Nd,Pm,Sm,Eu,Gd,Tb,Dy,Ho,Er,Tm,Yb,Lu]', 0), # Lanthanide 7: ('[V,Cr,Mn,Nb,Mo,Tc,Ta,W,Re]', 0), # Group VB,VIB,VIIB 8: ('[!#6;!#1]1~*~*~*~1', 0), # QAAA@1 9: ('[Fe,Co,Ni,Ru,Rh,Pd,Os,Ir,Pt]', 0), # Group VIII (Fe...) 10: ('[Be,Mg,Ca,Sr,Ba,Ra]', 0), # Group IIa (Alkaline earth) 11: ('*1~*~*~*~1', 0), # 4M Ring 12: ('[Cu,Zn,Ag,Cd,Au,Hg]', 0), # Group IB,IIB (Cu..) 13: ('[#8]~[#7](~[#6])~[#6]', 0), # ON(C)C 14: ('[#16]-[#16]', 0), # S-S 15: ('[#8]~[#6](~[#8])~[#8]', 0), # OC(O)O 16: ('[!#6;!#1]1~*~*~1', 0), # QAA@1 17: ('[#6]#[#6]', 0), # CTC 18: ('[#5,#13,#31,#49,#81]', 0), # Group IIIA (B...) 19: ('*1~*~*~*~*~*~*~1', 0), # 7M Ring 20: ('[#14]', 0), # Si 21: ('[#6]=[#6](~[!#6;!#1])~[!#6;!#1]', 0), # C=C(Q)Q 22: ('*1~*~*~1', 0), # 3M Ring 23: ('[#7]~[#6](~[#8])~[#8]', 0), # NC(O)O 24: ('[#7]-[#8]', 0), # N-O 25: ('[#7]~[#6](~[#7])~[#7]', 0), # NC(N)N 26: ('[#6]=;@[#6](@*)@*', 0), # C$=C($A)$A 27: ('[I]', 0), # I 28: ('[!#6;!#1]~[CH2]~[!#6;!#1]', 0), # QCH2Q 29: ('[#15]', 0), # P 30: ('[#6]~[!#6;!#1](~[#6])(~[#6])~*', 0), # CQ(C)(C)A 31: ('[!#6;!#1]~[F,Cl,Br,I]', 0), # QX 32: ('[#6]~[#16]~[#7]', 0), # CSN 33: ('[#7]~[#16]', 0), # NS 34: ('[CH2]=*', 0), # CH2=A 35: ('[Li,Na,K,Rb,Cs,Fr]', 0), # Group IA (Alkali Metal) 36: ('[#16R]', 0), # S Heterocycle 37: ('[#7]~[#6](~[#8])~[#7]', 0), # NC(O)N 38: ('[#7]~[#6](~[#6])~[#7]', 0), # NC(C)N 39: ('[#8]~[#16](~[#8])~[#8]', 0), # OS(O)O 40: ('[#16]-[#8]', 0), # S-O 41: ('[#6]#[#7]', 0), # CTN 42: ('F', 0), # F 43: ('[!#6;!#1;!H0]~*~[!#6;!#1;!H0]', 0), # QHAQH 44: ('[!#1;!#6;!#7;!#8;!#9;!#14;!#15;!#16;!#17;!#35;!#53]', 0), # OTHER 45: ('[#6]=[#6]~[#7]', 0), # C=CN 46: ('Br', 0), # BR 47: ('[#16]~*~[#7]', 0), # SAN 48: ('[#8]~[!#6;!#1](~[#8])(~[#8])', 0), # OQ(O)O 49: ('[!+0]', 0), # CHARGE 50: ('[#6]=[#6](~[#6])~[#6]', 0), # C=C(C)C 51: ('[#6]~[#16]~[#8]', 0), # CSO 52: ('[#7]~[#7]', 0), # NN 53: ('[!#6;!#1;!H0]~*~*~*~[!#6;!#1;!H0]', 0), # QHAAAQH 54: ('[!#6;!#1;!H0]~*~*~[!#6;!#1;!H0]', 0), # QHAAQH 55: ('[#8]~[#16]~[#8]', 0), # OSO 56: ('[#8]~[#7](~[#8])~[#6]', 0), # ON(O)C 57: ('[#8R]', 0), # O Heterocycle 58: ('[!#6;!#1]~[#16]~[!#6;!#1]', 0), # QSQ 59: ('[#16]!:*:*', 0), # Snot%A%A 60: ('[#16]=[#8]', 0), # S=O 61: ('*~[#16](~*)~*', 0), # AS(A)A 62: ('*@*!@*@*', 0), # A$!A$A 63: ('[#7]=[#8]', 0), # N=O 64: ('*@*!@[#16]', 0), # A$A!S 65: ('c:n', 0), # C%N 66: ('[#6]~[#6](~[#6])(~[#6])~*', 0), # CC(C)(C)A 67: ('[!#6;!#1]~[#16]', 0), # QS 68: ('[!#6;!#1;!H0]~[!#6;!#1;!H0]', 0), # QHQH (&...) SPEC Incomplete 69: ('[!#6;!#1]~[!#6;!#1;!H0]', 0), # QQH 70: ('[!#6;!#1]~[#7]~[!#6;!#1]', 0), # QNQ 71: ('[#7]~[#8]', 0), # NO 72: ('[#8]~*~*~[#8]', 0), # OAAO 73: ('[#16]=*', 0), # S=A 74: ('[CH3]~*~[CH3]', 0), # CH3ACH3 75: ('*!@[#7]@*', 0), # A!N$A 76: ('[#6]=[#6](~*)~*', 0), # C=C(A)A 77: ('[#7]~*~[#7]', 0), # NAN 78: ('[#6]=[#7]', 0), # C=N 79: ('[#7]~*~*~[#7]', 0), # NAAN 80: ('[#7]~*~*~*~[#7]', 0), # NAAAN 81: ('[#16]~*(~*)~*', 0), # SA(A)A 82: ('*~[CH2]~[!#6;!#1;!H0]', 0), # ACH2QH 83: ('[!#6;!#1]1~*~*~*~*~1', 0), # QAAAA@1 84: ('[NH2]', 0), # NH2 85: ('[#6]~[#7](~[#6])~[#6]', 0), # CN(C)C 86: ('[C;H2,H3][!#6;!#1][C;H2,H3]', 0), # CH2QCH2 87: ('[F,Cl,Br,I]!@*@*', 0), # X!A$A 88: ('[#16]', 0), # S 89: ('[#8]~*~*~*~[#8]', 0), # OAAAO 90: ('[$([!#6;!#1;!H0]~*~*~[CH2]~*),$([!#6;!#1;!H0;R]1@[R]@[R]@[CH2;R]1),$([!#6;!#1;!H0]~[R]1@[R]@[CH2;R]1)]', 0), # QHAACH2A 91: ( '[$([!#6;!#1;!H0]~*~*~*~[CH2]~*),$([!#6;!#1;!H0;R]1@[R]@[R]@[R]@[CH2;R]1),$([!#6;!#1;!H0]~[R]1@[R]@[R]@[' 'CH2;R]1),$([!#6;!#1;!H0]~*~[R]1@[R]@[CH2;R]1)]', 0), # QHAAACH2A 92: ('[#8]~[#6](~[#7])~[#6]', 0), # OC(N)C 93: ('[!#6;!#1]~[CH3]', 0), # QCH3 94: ('[!#6;!#1]~[#7]', 0), # QN 95: ('[#7]~*~*~[#8]', 0), # NAAO 96: ('*1~*~*~*~*~1', 0), # 5 M ring 97: ('[#7]~*~*~*~[#8]', 0), # NAAAO 98: ('[!#6;!#1]1~*~*~*~*~*~1', 0), # QAAAAA@1 99: ('[#6]=[#6]', 0), # C=C 100: ('*~[CH2]~[#7]', 0), # ACH2N 101: ( '[$([R]@1@[R]@[R]@[R]@[R]@[R]@[R]@[R]1),$([R]@1@[R]@[R]@[R]@[R]@[R]@[R]@[R]@[R]1),$([R]@1@[R]@[R]@[R]@[' 'R]@[R]@[R]@[R]@[R]@[R]1),$([R]@1@[R]@[R]@[R]@[R]@[R]@[R]@[R]@[R]@[R]@[R]1),$([R]@1@[R]@[R]@[R]@[R]@[R]@[' 'R]@[R]@[R]@[R]@[R]@[R]1),$([R]@1@[R]@[R]@[R]@[R]@[R]@[R]@[R]@[R]@[R]@[R]@[R]@[R]1),$([R]@1@[R]@[R]@[R]@[' 'R]@[R]@[R]@[R]@[R]@[R]@[R]@[R]@[R]@[R]1)]', 0), # 8M Ring or larger. This only handles up to ring sizes of 14 102: ('[!#6;!#1]~[#8]', 0), # QO 103: ('Cl', 0), # CL 104: ('[!#6;!#1;!H0]~*~[CH2]~*', 0), # QHACH2A 105: ('*@*(@*)@*', 0), # A$A($A)$A 106: ('[!#6;!#1]~*(~[!#6;!#1])~[!#6;!#1]', 0), # QA(Q)Q 107: ('[F,Cl,Br,I]~*(~*)~*', 0), # XA(A)A 108: ('[CH3]~*~*~*~[CH2]~*', 0), # CH3AAACH2A 109: ('*~[CH2]~[#8]', 0), # ACH2O 110: ('[#7]~[#6]~[#8]', 0), # NCO 111: ('[#7]~*~[CH2]~*', 0), # NACH2A 112: ('*~*(~*)(~*)~*', 0), # AA(A)(A)A 113: ('[#8]!:*:*', 0), # Onot%A%A 114: ('[CH3]~[CH2]~*', 0), # CH3CH2A 115: ('[CH3]~*~[CH2]~*', 0), # CH3ACH2A 116: ('[$([CH3]~*~*~[CH2]~*),$([CH3]~*1~*~[CH2]1)]', 0), # CH3AACH2A 117: ('[#7]~*~[#8]', 0), # NAO 118: ('[$(*~[CH2]~[CH2]~*),$(*1~[CH2]~[CH2]1)]', 1), # ACH2CH2A > 1 119: ('[#7]=*', 0), # N=A 120: ('[!#6;R]', 1), # Heterocyclic atom > 1 (&...) Spec Incomplete 121: ('[#7;R]', 0), # N Heterocycle 122: ('*~[#7](~*)~*', 0), # AN(A)A 123: ('[#8]~[#6]~[#8]', 0), # OCO 124: ('[!#6;!#1]~[!#6;!#1]', 0), # QQ 125: ('?', 0), # Aromatic Ring > 1 126: ('*!@[#8]!@*', 0), # A!O!A 127: ('*@*!@[#8]', 1), # A$A!O > 1 (&...) Spec Incomplete 128: ( '[$(*~[CH2]~*~*~*~[CH2]~*),$([R]1@[CH2;R]@[R]@[R]@[R]@[CH2;R]1),$(*~[CH2]~[R]1@[R]@[R]@[CH2;R]1),' '$(*~[CH2]~*~[R]1@[R]@[CH2;R]1)]', 0), # ACH2AAACH2A 129: ('[$(*~[CH2]~*~*~[CH2]~*),$([R]1@[CH2]@[R]@[R]@[CH2;R]1),$(*~[CH2]~[R]1@[R]@[CH2;R]1)]', 0), # ACH2AACH2A 130: ('[!#6;!#1]~[!#6;!#1]', 1), # QQ > 1 (&...) Spec Incomplete 131: ('[!#6;!#1;!H0]', 1), # QH > 1 132: ('[#8]~*~[CH2]~*', 0), # OACH2A 133: ('*@*!@[#7]', 0), # A$A!N 134: ('[F,Cl,Br,I]', 0), # X (HALOGEN) 135: ('[#7]!:*:*', 0), # Nnot%A%A 136: ('[#8]=*', 1), # O=A>1 137: ('[!C;!c;R]', 0), # Heterocycle 138: ('[!#6;!#1]~[CH2]~*', 1), # QCH2A>1 (&...) Spec Incomplete 139: ('[O;!H0]', 0), # OH 140: ('[#8]', 3), # O > 3 (&...) Spec Incomplete 141: ('[CH3]', 2), # CH3 > 2 (&...) Spec Incomplete 142: ('[#7]', 1), # N > 1 143: ('*@*!@[#8]', 0), # A$A!O 144: ('*!:*:*!:*', 0), # Anot%A%Anot%A 145: ('*1~*~*~*~*~*~1', 1), # 6M ring > 1 146: ('[#8]', 2), # O > 2 147: ('[$(*~[CH2]~[CH2]~*),$([R]1@[CH2;R]@[CH2;R]1)]', 0), # ACH2CH2A 148: ('*~[!#6;!#1](~*)~*', 0), # AQ(A)A 149: ('[C;H3,H4]', 1), # CH3 > 1 150: ('*!@*@*!@*', 0), # A!A$A!A 151: ('[#7;!H0]', 0), # NH 152: ('[#8]~[#6](~[#6])~[#6]', 0), # OC(C)C 153: ('[!#6;!#1]~[CH2]~*', 0), # QCH2A 154: ('[#6]=[#8]', 0), # C=O 155: ('*!@[CH2]!@*', 0), # A!CH2!A 156: ('[#7]~*(~*)~*', 0), # NA(A)A 157: ('[#6]-[#8]', 0), # C-O 158: ('[#6]-[#7]', 0), # C-N 159: ('[#8]', 1), # O>1 160: ('[C;H3,H4]', 0), # CH3 161: ('[#7]', 0), # N 162: ('a', 0), # Aromatic 163: ('*1~*~*~*~*~*~1', 0), # 6M Ring 164: ('[#8]', 0), # O 165: ('[R]', 0), # Ring 166: ('?', 0), # Fragments FIX: this can't be done in SMARTS } ############################### ######### MACCS KEYS ######### ###############################
[docs]def draw_MACCS_Pattern(mol: Mol, smarts_patt_index: int, path: str = None): """ Draw a molecule with a MACCS key highlighted. Parameters ---------- mol: Mol Molecule to draw. smarts_patt_index: int Index of the MACCS key to highlight. path: str Path to save the image to. If None, the image is not saved. Returns ------- im: PIL.Image.Image Image of the molecule with the MACCS key highlighted. """ logger = Logger() smart = MACCSsmartsPatts[smarts_patt_index][0] patt = Chem.MolFromSmarts(smart) if mol.HasSubstructMatch(patt): hit_ats = mol.GetSubstructMatches(patt) bond_lists = [] for i, hit_at in enumerate(hit_ats): hit_at = list(hit_at) bond_list = [] for bond in patt.GetBonds(): a1 = hit_at[bond.GetBeginAtomIdx()] a2 = hit_at[bond.GetEndAtomIdx()] bond_list.append(mol.GetBondBetweenAtoms(a1, a2).GetIdx()) bond_lists.append(bond_list) colours = [] for i in range(len(hit_ats)): colours.append((random.random(), random.random(), random.random())) atom_cols = {} bond_cols = {} atom_list = [] bond_list = [] for i, (hit_atom, hit_bond) in enumerate(zip(hit_ats, bond_lists)): hit_atom = list(hit_atom) for at in hit_atom: atom_cols[at] = colours[i] atom_list.append(at) for bd in hit_bond: bond_cols[bd] = colours[i] bond_list.append(bd) d = rdMolDraw2D.MolDraw2DCairo(500, 500) rdMolDraw2D.PrepareAndDrawMolecule(d, mol, highlightAtoms=atom_list, highlightAtomColors=atom_cols, highlightBonds=bond_list, highlightBondColors=bond_cols) d.FinishDrawing() if path is None: with tempfile.TemporaryDirectory() as tmpdirname: d.WriteDrawingText(tmpdirname + 'mol.png') im = Image.open(tmpdirname + 'mol.png') return im else: d.WriteDrawingText(path) im = Image.open(path) return im else: logger.info('Pattern does not match molecule!')
############################### ##### MORGAN FINGERPRINTS ##### ###############################
[docs]def draw_morgan_bits(molecule: Mol, bits: Union[int, str, List[int]], radius: int = 2, nBits: int = 2048): """ Draw a molecule with Morgan fingerprint bits highlighted. Parameters ---------- molecule: Mol Molecule to draw. bits: Union[int, str, List[int]] Bit(s) to highlight. If 'ON', all bits that are set to 1 are highlighted. radius: int Radius of the Morgan fingerprint. nBits: int Number of bits in the Morgan fingerprint. Returns ------- DrawMorganBits Object containing the image of the molecule with the Morgan fingerprint bits highlighted. """ bi = {} mol = molecule fp = rdMolDescriptors.GetMorganFingerprintAsBitVect(mol, radius=radius, nBits=nBits, bitInfo=bi) logger = Logger() if isinstance(bits, int): if bits not in bi.keys(): logger.info(f'Bits ON: {bi.keys()}') raise ValueError('Bit is off! Select a on bit') return Draw.DrawMorganBit(mol, bits, bi) elif isinstance(bits, list): bits_on = [] for b in bits: if b in bi.keys(): bits_on.append(b) else: logger.info('Bit %d is off!' % (b)) if len(bits_on) == 0: raise ValueError('All the selected bits are off! Select on bits!') elif len(bits_on) != len(bits): logger.info('Using only bits ON: ', bits_on) tpls = [(mol, x, bi) for x in bits_on] return Draw.DrawMorganBits(tpls, molsPerRow=5, legends=['bit_' + str(x) for x in bits_on]) elif bits == 'ON': tpls = [(mol, x, bi) for x in fp.GetOnBits()] return Draw.DrawMorganBits(tpls, molsPerRow=5, legends=[str(x) for x in fp.GetOnBits()]) else: raise ValueError('Bits must be intenger, list of integers or ON!')
[docs]def prepareMol(mol: Mol, kekulize: bool): """ Prepare a molecule for drawing. Parameters ---------- mol: Mol Molecule to prepare. kekulize: bool If True, the molecule is kekulized. Returns ------- mc: Mol Prepared molecule. """ mc = Chem.Mol(mol.ToBinary()) if kekulize: try: Chem.Kekulize(mc) except: mc = Chem.Mol(mol.ToBinary()) if not mc.GetNumConformers(): rdDepictor.Compute2DCoords(mc) return mc
[docs]def moltosvg(mol: Mol, molSize: Tuple[int, int] = (450, 200), kekulize: bool = True, drawer: object = None, **kwargs): """ Convert a molecule to SVG. Parameters ---------- mol: Mol Molecule to convert. molSize: Tuple[int, int] Size of the molecule. kekulize: bool If True, the molecule is kekulized. drawer: object Object to draw the molecule. **kwargs: Additional arguments for the drawer. Returns ------- SVG The molecule in SVG format. """ mc = prepareMol(mol, kekulize) if drawer is None: drawer = rdMolDraw2D.MolDraw2DSVG(molSize[0], molSize[1]) drawer.DrawMolecule(mc, **kwargs) drawer.FinishDrawing() svg = drawer.GetDrawingText() return SVG(svg.replace('svg:', ''))
[docs]def getSubstructDepiction(mol: Mol, atomID: int, radius: int, molSize: Tuple[int, int] = (450, 200)): """ Get a depiction of a substructure. Parameters ---------- mol: Mol Molecule to draw. atomID: int ID of the atom to highlight. radius: int Radius of the substructure. molSize: Tuple[int, int] Size of the molecule. Returns ------- SVG The molecule in SVG format. """ if radius > 0: env = Chem.FindAtomEnvironmentOfRadiusN(mol, radius, atomID) atomsToUse = [] for b in env: atomsToUse.append(mol.GetBondWithIdx(b).GetBeginAtomIdx()) atomsToUse.append(mol.GetBondWithIdx(b).GetEndAtomIdx()) atomsToUse = list(set(atomsToUse)) else: atomsToUse = [atomID] return moltosvg(mol, molSize=molSize, highlightAtoms=atomsToUse, highlightAtomColors={atomID: (0.3, 0.3, 1)})
[docs]def draw_morgan_bit_on_molecule(mol: Mol, bit: int, radius: int = 2, nBits: int = 2048, chiral: bool = False, molSize: Tuple[int, int] = (450, 200)): """ Draw a molecule with a Morgan fingerprint bit highlighted. Parameters ---------- mol: Mol Molecule to draw. bit: int Bit to highlight. radius: int Radius of the Morgan fingerprint. nBits: int Number of bits in the Morgan fingerprint. chiral: bool If True, the molecule is drawn with chiral information. molSize: Tuple[int, int] Size of the molecule. Returns ------- SVG The molecule in SVG format. """ info = {} rdMolDescriptors.GetMorganFingerprintAsBitVect(mol, radius=radius, nBits=nBits, bitInfo=info, useChirality=chiral) logger = Logger() if bit not in info.keys(): logger.info('Bits ON: ', info.keys()) raise ValueError('Bit is off! Select a on bit') logger.info('Bit %d with %d hits!' % (bit, len(info[bit]))) aid, rad = info[bit][0] return getSubstructDepiction(mol, aid, rad, molSize=molSize)
############################### ##### RDK FINGERPRINTS ##### ###############################
[docs]def draw_rdk_bits(mol: Mol, bits: int, minPath: int = 2, maxPath: int = 7, fpSize: int = 2048): """ Draw a molecule with a RDK fingerprint bit highlighted. Parameters ---------- mol: Mol Molecule to draw. bits: int Bit to highlight. minPath: int Minimum path length. maxPath: int Maximum path length. fpSize: int Number of bits in the fingerprint. Returns ------- Draw.DrawRDKitBits The molecule with the fingerprint bits. """ rdkbit = {} fp = RDKFingerprint(mol, minPath=minPath, maxPath=maxPath, fpSize=fpSize, bitInfo=rdkbit) logger = Logger() if isinstance(bits, int): if bits not in rdkbit.keys(): logger.info(f'Bits ON: {rdkbit.keys()}') raise ValueError('Bit is off! Select a on bit') return Draw.DrawRDKitBit(mol, bits, rdkbit) elif isinstance(bits, list): bits_on = [] for b in bits: if b in rdkbit.keys(): bits_on.append(b) else: logger.info('Bit %d is off!' % (b)) if len(bits_on) == 0: raise ValueError('All the selected bits are off! Select on bits!') elif len(bits_on) != len(bits): logger.info(f'Bits ON: {bits_on}') tpls = [(mol, x, rdkbit) for x in bits_on] return Draw.DrawRDKitBits(tpls, molsPerRow=5, legends=['bit_' + str(x) for x in bits_on]) elif bits == 'ON': tpls = [(mol, x, rdkbit) for x in fp.GetOnBits()] return Draw.DrawRDKitBits(tpls, molsPerRow=5, legends=[str(x) for x in fp.GetOnBits()]) else: raise ValueError('Bits must be intenger, list of integers or ON!')
[docs]def draw_rdk_bit_on_molecule(mol: Mol, bit: int, minPath: int = 1, maxPath: int = 7, fpSize: int = 2048, path_dir: str = None, molSize: Tuple[int, int] = (450, 200)): """ Draw a molecule with a RDK fingerprint bit highlighted. Parameters ---------- mol: Mol Molecule to draw. bit: int Bit to highlight. minPath: int Minimum path length. maxPath: int Maximum path length. fpSize: int Number of bits in the fingerprint. path_dir: str Path to save the image. molSize: Tuple[int, int] Size of the molecule. Returns ------- Images The molecule with the fingerprint bit highlighted. """ logger = Logger() info = {} RDKFingerprint(mol, minPath=minPath, maxPath=maxPath, fpSize=fpSize, bitInfo=info) if bit not in info.keys(): logger.info(f'Bits ON: {info.keys()}') raise ValueError('Bit is off! Select a on bit') logger.info('Bit %d with %d hits!' % (bit, len(info[bit]))) images = [] for i in range(len(info[bit])): d = rdMolDraw2D.MolDraw2DCairo(molSize[0], molSize[1]) rdMolDraw2D.PrepareAndDrawMolecule(d, mol, highlightBonds=info[bit][i]) d.FinishDrawing() if path_dir is None: with tempfile.TemporaryDirectory() as tmpdirname: d.WriteDrawingText(tmpdirname + 'mol_' + str(i) + '.png') im = Image.open(tmpdirname + 'mol_' + str(i) + '.png') images.append(im) else: d.WriteDrawingText(path_dir + 'mol_' + str(i) + '.png') im = Image.open(path_dir + 'mol_' + str(i) + '.png') images.append(im) return display(*images)