Highlighting atom features with RDKit

Gist here

The SMARTS codes, found online, to define hydrogen bond acceptors and donors can sometimes lead to odd results when considering protein-ligand interactions. That is, they might not actually accept or donate hydrogen bonds in practice. Similarly, HBDs can have more (or fewer) hydrogens, depending on which model of pKa is used. Figuring all this out requires, in turn, knowledge of the conjugation state of an atom to assess the behaviour of lone pairs. For examples/refs, see here and here

To help with my own models of atom typing, I wrote a visualization function to highlight HBD/HBAs, conjugated bonds, and aromatic atoms in RDKit. The RDKit drawing code can generate some really sophisticated visualizations if you're willing to hack. It reminds me of matplotlib a bit. I want to credit Christian Feldmann for writing Lasso Highlight, which is brilliant and was subsequently merged into the RDKit. The code in there is a useful example for drawing on rdkit canvases

Aromatic atoms are beige-y blue (?), conjugated bonds have a dot, HBAs are orange, and HBDs are blue. The code for a notebook is below (not incl. hba/hbd typing), or at the gist link above, and generates this:

from rdkit import Chem
from rdkit.Chem import Draw
import numpy as np
import pandas as pd
from IPython.display import SVG

def setBasicNitrogens(mol):
    nitrogens = [i[0] for i in mol.GetSubstructMatches(Chem.MolFromSmarts('N'))]
    for nidx in nitrogens:
        atom = mol.GetAtomWithIdx(nidx)
        if atom.GetIsAromatic():
            continue
        bonds = atom.GetBonds()
        conj = any([b.GetIsConjugated() for b in bonds])
        if conj:
            continue
        deg = atom.GetDegree()
        if atom.GetExplicitValence() == deg:
            atom.SetNumExplicitHs(4-deg)
            atom.SetFormalCharge(+1)
        
def setAcidicOs(mol):
    #carboxylates
    oxygens = [i[0] for i in mol.GetSubstructMatches(Chem.MolFromSmarts('[$([OD1][CX3](=[OD1]))]'))]
    for oidx in oxygens:
        atom = mol.GetAtomWithIdx(oidx)
        #atom.SetNumExplicitHs(0)
        atom.SetFormalCharge(-1)
        atom.UpdatePropertyCache()
    Chem.SanitizeMol(mol)

def describeMol(mol, hbds=[], hbas=[]):
    from rdkit.Chem import PeriodicTable
    from collections import defaultdict
    from rdkit.Chem.Draw import rdMolDraw2D
    from rdkit.Geometry import Point2D

    angles = np.linspace(0, np.pi*2, 60)
    circle_x, circle_y = np.sin(angles), np.cos(angles)
    circle = np.vstack([circle_x, circle_y]).T
    mol = Draw.PrepareMolForDrawing(mol)
    conf = mol.GetConformer(0)
    
    def get_lone_pairs(atom):
        """Credit to AstraZeneca/Jazzy for this
        """
       # set up a periodic table
        pt = Chem.GetPeriodicTable()
        symbol = atom.GetSymbol()
        valence_electrons = PeriodicTable.GetNOuterElecs(pt, symbol)
        unavailable_electrons = atom.GetExplicitValence()
        charge = atom.GetFormalCharge()
        free_electrons = valence_electrons - unavailable_electrons - charge
        return int(free_electrons / 2)

    def getAtomCoords(atom_idx):
        atom_pos = conf.GetAtomPosition(atom_idx)
        atom_pos = np.array([atom_pos.x, atom_pos.y])
        return atom_pos
        
    def drawCircle(pos, canvas, radius, color=(0.5, 0.5, 0.5), linewidth=1, filled=True):
        circle_ = circle*radius + pos
        circle_2d = [Point2D(*c) for c in circle_]
        canvas.SetFillPolys(filled)
        canvas.SetColour(color)
        canvas.SetLineWidth(linewidth)
        canvas.DrawPolygon(circle_2d)
        
    colors = {
        'Aromatic':(136/256,180/256,168/256, 0.6),
        'Conjugated':(0.5,0.8,0.8,0.4),
        'HBA':(11/256, 57/256, 235/256, 0.7),
        'HBD':(254/256, 97/256, 0/256,0.7),
    }

    d2d = rdMolDraw2D.MolDraw2DSVG(350, 350)
    d2d.drawOptions().addAtomIndices=True 
    
    for atom in mol.GetAtoms():
        if atom.GetAtomicNum() in [7,8]:
            atom.SetProp("atomNote", 'lp:'+str(get_lone_pairs(atom)))
        else:
            atom.SetProp("atomNote", "")
            
    d2d.DrawMolecule(mol, legend='')

    for atom in mol.GetAtoms():
        aidx = atom.GetIdx()
        if atom.GetIsAromatic():
            pos = getAtomCoords(aidx)
            drawCircle(pos, d2d, 0.25, colors['Aromatic'])
               
    for bond in mol.GetBonds():
        bid = bond.GetIdx()
        if bond.GetIsConjugated():
            begin_aidx = bond.GetBeginAtomIdx()
            end_aidx = bond.GetEndAtomIdx()
            begin_pos = getAtomCoords(begin_aidx)
            end_pos = getAtomCoords(end_aidx)
            pos = begin_pos/2 + end_pos/2
            drawCircle(pos, d2d, 0.10, (0.2,0.2,0.2))
            drawCircle(pos, d2d, 0.12, (1,1,1), filled=False)

    # ## If HBA and HBD indices given, add circles around them:
    for idx in hbas:
        pos = getAtomCoords(idx)
        drawCircle(pos, d2d, 0.385, colors['HBA'],filled=False,linewidth=3)
    for idx in hbds:
        pos = getAtomCoords(idx)
        drawCircle(pos, d2d, 0.515, colors['HBD'],filled=False,linewidth=3)
    
    d2d.FinishDrawing()
    text = d2d.GetDrawingText()
    return text

smi = 'CC1=C(CC(O)=O)C2=CC(CN3CCC(O)C3)=CC=C2N1C(=O)C1=CC=C(N)C=C1' #indomethacin-ish
mol = Chem.MolFromSmiles(smi)
setBasicNitrogens(mol)
setAcidicOs(mol)
text = describeMol(mol, 
                [i[0] for i in mol.GetSubstructMatches(Chem.MolFromSmarts('N'))]+[15],
                [i[0] for i in mol.GetSubstructMatches(Chem.MolFromSmarts('O'))])
SVG(text)