from openbabel import openbabel import types import re import chemaxon from thermodynamic_constants import default_T, default_pH import pdb class OpenBabelError(Exception): pass class Molecule(object): # for more rendering options visit: # http://www.ggasoftware.com/opensource/indigo/api/options#rendering _obElements = openbabel.OBElementTable() _obSmarts = openbabel.OBSmartsPattern() @staticmethod def GetNumberOfElements(): return Molecule._obElements.GetNumberOfElements() @staticmethod def GetAllElements(): return [Molecule._obElements.GetSymbol(i) for i in range(Molecule.GetNumberOfElements())] @staticmethod def GetSymbol(atomic_num): return Molecule._obElements.GetSymbol(atomic_num) @staticmethod def GetAtomicNum(elem): if type(elem) == str: elem = str(elem) return Molecule._obElements.GetAtomicNum(elem) @staticmethod def VerifySmarts(smarts): return Molecule._obSmarts.Init(smarts) def __init__(self): self.title = None self.obmol = openbabel.OBMol() self.smiles = None self.inchi = None def __str__(self): return self.title or self.smiles or self.inchi or "" def __len__(self): return self.GetNumAtoms() def Clone(self): tmp = Molecule() tmp.title = self.title tmp.obmol = openbabel.OBMol(self.obmol) tmp.smiles = self.smiles tmp.inchi = self.inchi return tmp def SetTitle(self, title): self.title = title @staticmethod def FromSmiles(smiles): m = Molecule() m.smiles = smiles obConversion = openbabel.OBConversion() obConversion.AddOption("w", obConversion.OUTOPTIONS) obConversion.SetInFormat("smiles") if not obConversion.ReadString(m.obmol, m.smiles): raise OpenBabelError("Cannot read the SMILES string: " + smiles) try: m.UpdateSmiles() #m.UpdateInChI() except OpenBabelError: raise OpenBabelError("Failed to create Molecule from SMILES: " + smiles) m.SetTitle(smiles) return m @staticmethod def FromInChI(inchi): m = Molecule() m.inchi = inchi obConversion = openbabel.OBConversion() obConversion.AddOption("w", obConversion.OUTOPTIONS) obConversion.SetInFormat("inchi") obConversion.ReadString(m.obmol, m.inchi) try: m.UpdateInChI() #m.UpdateSmiles() except OpenBabelError: raise OpenBabelError("Failed to create Molecule from InChI: " + inchi) m.SetTitle(inchi) return m @staticmethod def FromMol(mol): m = Molecule() obConversion = openbabel.OBConversion() obConversion.AddOption("w", obConversion.OUTOPTIONS) obConversion.SetInFormat("mol") obConversion.ReadString(m.obmol, mol) try: m.UpdateInChI() m.UpdateSmiles() except OpenBabelError: raise OpenBabelError("Failed to create Molecule from MOL file:\n" + mol) m.SetTitle("") return m @staticmethod def FromOBMol(obmol): m = Molecule() m.obmol = obmol try: m.UpdateInChI() m.UpdateSmiles() except OpenBabelError: raise OpenBabelError("Failed to create Molecule from OBMol") m.SetTitle("") return m @staticmethod def _FromFormat(s, fmt='inchi'): if fmt == 'smiles' or fmt == 'smi': return Molecule.FromSmiles(s) if fmt == 'inchi': return Molecule.FromInChI(s) if fmt == 'mol': return Molecule.FromMol(s) if fmt == 'obmol': return Molecule.FromOBMol(s) @staticmethod def _ToFormat(obmol, fmt='inchi'): #print('formatting started...') #pdb.set_trace() obConversion = openbabel.OBConversion() obConversion.AddOption("w", obConversion.OUTOPTIONS) obConversion.SetOutFormat(fmt) res = obConversion.WriteString(obmol) #print('res :::: ') #print(res) if not res: raise OpenBabelError("Cannot convert OBMol to %s" % fmt) if fmt == 'smiles' or fmt == 'smi': #print('I am in') res = res.split() if res == []: raise OpenBabelError("Cannot convert OBMol to %s" % fmt) else: return res[0] elif fmt == 'inchi': return res.strip() else: return res @staticmethod def Smiles2InChI(smiles): obConversion = openbabel.OBConversion() obConversion.AddOption("w", obConversion.OUTOPTIONS) obConversion.SetInAndOutFormats("smiles", "inchi") obmol = openbabel.OBMol() if not obConversion.ReadString(obmol, smiles): raise OpenBabelError("Cannot read the SMILES string: " + smiles) return obConversion.WriteString(obmol).strip() @staticmethod def InChI2Smiles(inchi): obConversion = openbabel.OBConversion() obConversion.AddOption("w", obConversion.OUTOPTIONS) obConversion.SetInAndOutFormats("inchi", "smiles") obmol = openbabel.OBMol() if not obConversion.ReadString(obmol, inchi): raise OpenBabelError("Cannot read the InChI string: " + inchi) return obConversion.WriteString(obmol).split()[0] def RemoveHydrogens(self): self.obmol.DeleteHydrogens() def RemoveAtoms(self, indices): self.obmol.BeginModify() for i in sorted(indices, reverse=True): self.obmol.DeleteAtom(self.obmol.GetAtom(i+1)) self.obmol.EndModify() self.smiles = None self.inchi = None def SetAtomicNum(self, index, new_atomic_num): self.obmol.GetAtom(index+1).SetAtomicNum(new_atomic_num) self.smiles = None self.inchi = None def ToOBMol(self): return self.obmol def ToFormat(self, fmt='inchi'): return Molecule._ToFormat(self.obmol, fmt=fmt) def ToMolfile(self): return self.ToFormat('mol') def UpdateInChI(self): self.inchi = Molecule._ToFormat(self.obmol, 'inchi') def ToInChI(self): """ Lazy storage of the InChI identifier (calculate once only when asked for and store for later use). """ if not self.inchi: self.UpdateInChI() return self.inchi def UpdateSmiles(self): self.smiles = Molecule._ToFormat(self.obmol, 'smiles') def ToSmiles(self): """ Lazy storage of the SMILES identifier (calculate once only when asked for and store for later use). """ if not self.smiles: self.UpdateSmiles() return self.smiles def GetFormula(self): tokens = re.findall('InChI=1S?/([0-9A-Za-z\.]+)', self.ToInChI()) if len(tokens) == 1: return tokens[0] elif len(tokens) > 1: raise ValueError('Bad InChI: ' + self.ToInChI()) else: return '' def GetExactMass(self): return self.obmol.GetExactMass() def GetAtomBagAndCharge(self): inchi = self.ToInChI() atom_bag, major_ms_charge = chemaxon.GetAtomBagAndCharge(inchi) return atom_bag, major_ms_charge def GetHydrogensAndCharge(self): atom_bag, charge = self.GetAtomBagAndCharge() return atom_bag.get('H', 0), charge def GetNumElectrons(self): """Calculates the number of electrons in a given molecule.""" atom_bag, fixed_charge = self.GetAtomBagAndCharge() return atom_bag.get('e-', 0) def GetNumAtoms(self): return self.obmol.NumAtoms() def GetAtoms(self): return [self.obmol.GetAtom(i+1) for i in range(self.obmol.NumAtoms())] def FindSmarts(self, smarts): """ Corrects the pyBel version of Smarts.findall() which returns results as tuples, with 1-based indices even though Molecule.atoms is 0-based. Args: mol: the molecule to search in. smarts_str: the SMARTS query to search for. Returns: The re-mapped list of SMARTS matches. """ Molecule._obSmarts.Init(smarts) if Molecule._obSmarts.Match(self.obmol): match_list = Molecule._obSmarts.GetMapList() shift_left = lambda m: [(n - 1) for n in m] return list(map(shift_left, match_list)) else: return [] def GetAtomCharges(self): """ Returns: A list of charges, according to the number of atoms in the molecule """ return [atom.GetFormalCharge() for atom in self.GetAtoms()] if __name__ == '__main__': mol = Molecule.FromInChI('InChI=1/C5H10O2/c1-3-5(6)7-4-2/h3-4H2,1-2H3') #mol = Molecule.FromInChI('InChI=1S/H2/h1H') print(mol.GetExactMass())