{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "ed0cdaf6-71e1-4ef0-894f-0beabdc392cf", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import re\n", "from PIL import Image\n", "import webbrowser\n", "import json\n", "import pickle\n", "import sys \n", "import joblib\n", "import sys\n", "\n", "from rdkit import Chem\n", "from rdkit.Chem import Draw\n", "from rdkit.Chem import rdChemReactions as Reactions\n", "\n", "from compound_cacher import CompoundCacher\n", "from compound import Compound\n", "from chemaxon import *\n", "import chemaxon" ] }, { "cell_type": "code", "execution_count": 2, "id": "e64deced-2a44-4d8e-ba8f-d9843f11724a", "metadata": {}, "outputs": [], "source": [ "def load_smiles():\n", " db = pd.read_csv('./../data/cache_compounds_20160818.csv',index_col='compound_id')\n", " db_smiles = db['smiles_pH7'].to_dict()\n", " return db_smiles\n", "\n", "def load_molsig_rad1():\n", " molecular_signature_r1 = json.load(open('./../data/decompose_vector_ac.json'))\n", " return molecular_signature_r1\n", "\n", "def load_molsig_rad2():\n", " molecular_signature_r2 = json.load(open('./../data/decompose_vector_ac_r2_py3_indent_modified_manual.json'))\n", " return molecular_signature_r2\n", "\n", "def load_model():\n", " filename = './../model/M12_model_BR.pkl'\n", " loaded_model = joblib.load(open(filename, 'rb'))\n", " return loaded_model" ] }, { "cell_type": "code", "execution_count": 3, "id": "71615c14-49c3-45e7-9495-194ef22fb1ee", "metadata": {}, "outputs": [], "source": [ "db_smiles = load_smiles()\n", "molsig_r1 = load_molsig_rad1()\n", "molsig_r2 = load_molsig_rad2()\n", "loaded_model = load_model()" ] }, { "cell_type": "code", "execution_count": 4, "id": "b86b8049-cbf2-473f-8715-5e5f908193a2", "metadata": {}, "outputs": [], "source": [ "def parse_reaction_formula_side(s):\n", " \"\"\"\n", " Parses the side formula, e.g. '2 C00001 + C00002 + 3 C00003'\n", " Ignores stoichiometry.\n", "\n", " Returns:\n", " The set of CIDs.\n", " \"\"\"\n", " if s.strip() == \"null\":\n", " return {}\n", "\n", " compound_bag = {}\n", " for member in re.split('\\s+\\+\\s+', s):\n", " tokens = member.split(None, 1)\n", " if len(tokens) == 0:\n", " continue\n", " if len(tokens) == 1:\n", " amount = 1\n", " key = member\n", " else:\n", " amount = float(tokens[0])\n", " key = tokens[1]\n", "\n", " compound_bag[key] = compound_bag.get(key, 0) + amount\n", "\n", " return compound_bag\n", "\n", "def parse_formula(formula, arrow='<=>', rid=None):\n", " \"\"\"\n", " Parses a two-sided formula such as: 2 C00001 => C00002 + C00003\n", "\n", " Return:\n", " The set of substrates, products and the direction of the reaction\n", " \"\"\"\n", " tokens = formula.split(arrow)\n", " if len(tokens) < 2:\n", " print(('Reaction does not contain the arrow sign (%s): %s'\n", " % (arrow, formula)))\n", " if len(tokens) > 2:\n", " print(('Reaction contains more than one arrow sign (%s): %s'\n", " % (arrow, formula)))\n", "\n", " left = tokens[0].strip()\n", " right = tokens[1].strip()\n", "\n", " sparse_reaction = {}\n", " for cid, count in parse_reaction_formula_side(left).items():\n", " sparse_reaction[cid] = sparse_reaction.get(cid, 0) - count\n", "\n", " for cid, count in parse_reaction_formula_side(right).items():\n", " sparse_reaction[cid] = sparse_reaction.get(cid, 0) + count \n", " \n", " return sparse_reaction" ] }, { "cell_type": "code", "execution_count": 5, "id": "7342b178-3472-4734-83e3-3de431abe15e", "metadata": {}, "outputs": [], "source": [ "rxn_string = \"C00222 + C00010 + C00006 <=> C00024 + C00011 + C00005\"" ] }, { "cell_type": "code", "execution_count": 6, "id": "7b4dfe4f-48a8-4011-b201-7fb3a3268cef", "metadata": {}, "outputs": [], "source": [ "rxn_dic = parse_formula(rxn_string)" ] }, { "cell_type": "code", "execution_count": 7, "id": "1f523aa2-b9dc-4153-8c1c-dec58e1ab987", "metadata": {}, "outputs": [], "source": [ "def get_ddG0(rxn_dict,pH,I,novel_mets):\n", " ccache = CompoundCacher()\n", " # ddG0 = get_transform_ddG0(rxn_dict, ccache, pH, I, T)\n", " T = 298.15\n", " ddG0_forward = 0\n", " for compound_id, coeff in rxn_dict.items():\n", " if novel_mets != None and compound_id in novel_mets:\n", " comp = novel_mets[compound_id]\n", " else:\n", " comp = ccache.get_compound(compound_id)\n", " ddG0_forward += coeff * comp.transform_pH7(pH, I, T)\n", "\n", " return ddG0_forward" ] }, { "cell_type": "code", "execution_count": 8, "id": "33cf30ff-8b2c-4da9-9134-75a60a5c5d66", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "-3.6254822995515497" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "get_ddG0(rxn_dic, 7.0, 0.1, {})" ] }, { "cell_type": "code", "execution_count": 9, "id": "9e39855d-eb9e-4ea9-aeb9-8b770cc24c8e", "metadata": {}, "outputs": [], "source": [ "def get_rule(rxn_dict, molsig1, molsig2, novel_decomposed1, novel_decomposed2):\n", " if novel_decomposed1 != None:\n", " for cid in novel_decomposed1:\n", " molsig1[cid] = novel_decomposed1[cid]\n", " if novel_decomposed2 != None:\n", " for cid in novel_decomposed2:\n", " molsig2[cid] = novel_decomposed2[cid]\n", "\n", " molsigna_df1 = pd.DataFrame.from_dict(molsig1).fillna(0)\n", " all_mets1 = molsigna_df1.columns.tolist()\n", " all_mets1.append(\"C00080\")\n", " all_mets1.append(\"C00282\")\n", "\n", " molsigna_df2 = pd.DataFrame.from_dict(molsig2).fillna(0)\n", " all_mets2 = molsigna_df2.columns.tolist()\n", " all_mets2.append(\"C00080\")\n", " all_mets2.append(\"C00282\")\n", "\n", " moieties_r1 = open('./data/group_names_r1.txt')\n", " moieties_r2 = open('./data/group_names_r2_py3_modified_manual.txt')\n", " moie_r1 = moieties_r1.read().splitlines()\n", " moie_r2 = moieties_r2.read().splitlines()\n", "\n", " molsigna_df1 = molsigna_df1.reindex(moie_r1)\n", " molsigna_df2 = molsigna_df2.reindex(moie_r2)\n", "\n", " rule_df1 = pd.DataFrame(index=molsigna_df1.index)\n", " rule_df2 = pd.DataFrame(index=molsigna_df2.index)\n", " # for rid, value in reaction_dict.items():\n", " # # skip the reactions with missing metabolites\n", " # mets = value.keys()\n", " # flag = False\n", " # for met in mets:\n", " # if met not in all_mets:\n", " # flag = True\n", " # break\n", " # if flag: continue\n", "\n", " rule_df1['change'] = 0\n", " for met, stoic in rxn_dict.items():\n", " if met == \"C00080\" or met == \"C00282\":\n", " continue # hydogen is zero\n", " rule_df1['change'] += molsigna_df1[met] * stoic\n", "\n", " rule_df2['change'] = 0\n", " for met, stoic in rxn_dict.items():\n", " if met == \"C00080\" or met == \"C00282\":\n", " continue # hydogen is zero\n", " rule_df2['change'] += molsigna_df2[met] * stoic\n", "\n", " rule_vec1 = rule_df1.to_numpy().T\n", " rule_vec2 = rule_df2.to_numpy().T\n", "\n", " m1, n1 = rule_vec1.shape\n", " m2, n2 = rule_vec2.shape\n", "\n", " zeros1 = np.zeros((m1, 44))\n", " zeros2 = np.zeros((m2, 44))\n", " X1 = np.concatenate((rule_vec1, zeros1), 1)\n", " X2 = np.concatenate((rule_vec2, zeros2), 1)\n", "\n", " rule_comb = np.concatenate((X1, X2), 1)\n", "\n", " # rule_df_final = {}\n", " # rule_df_final['rad1'] = rule_df1\n", " # rule_df_final['rad2'] = rule_df2\n", " return rule_comb, rule_df1, rule_df2\n" ] }, { "cell_type": "code", "execution_count": 14, "id": "a93ea75e-9851-45fd-aa58-d7f325b4b5a6", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'C00222': -1,\n", " 'C00010': -1,\n", " 'C00006': -1,\n", " 'C00024': 1,\n", " 'C00011': 1,\n", " 'C00005': 1}" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "rxn_dic" ] }, { "cell_type": "code", "execution_count": null, "id": "981948dd-db2c-4463-b983-1220353d963e", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 23, "id": "96eb1c38-2ca7-4e38-bcc4-ade1cef73852", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(array([-19.96775194]), array([6.66052556]))" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "loaded_model.predict(X, return_std= True)" ] }, { "cell_type": "code", "execution_count": null, "id": "81128dd3-5005-40a6-b5fe-8ecacef824bc", "metadata": {}, "outputs": [], "source": [ "def get_ddG0(rxn_dict,pH,I,novel_mets):\n", " ccache = CompoundCacher()\n", " # ddG0 = get_transform_ddG0(rxn_dict, ccache, pH, I, T)\n", " T = 298.15\n", " ddG0_forward = 0\n", " for compound_id, coeff in rxn_dict.items():\n", " if novel_mets != None and compound_id in novel_mets:\n", " comp = novel_mets[compound_id]\n", " else:\n", " comp = ccache.get_compound(compound_id)\n", " ddG0_forward += coeff * comp.transform_pH7(pH, I, T)\n", "\n", " return ddG0_forward\n", "\n", "\n", "def get_dG0(rxn_dict,rid,pH,I,loaded_model,molsig_r1, molsig_r2, novel_decomposed_r1, novel_decomposed_r2,novel_mets):\n", " rule_comb, rule_df1, rule_df2 = get_rule(rxn_dict,molsig_r1,molsig_r2, novel_decomposed_r1, novel_decomposed_r2)\n", " X = rule_comb\n", " ymean, ystd = loaded_model.predict(X, return_std=True)\n", " result = {}\n", " return ymean[0] + get_ddG0(rxn_dict, pH, I, novel_mets),ystd[0], rule_df1, rule_df2" ] }, { "cell_type": "code", "execution_count": null, "id": "751ec201-f062-4ac0-8d24-fe959636cbdc", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "c6cb1e4d-24be-42a1-b88b-793a62597c92", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "7abe24be-1653-455b-9931-9446480d39bb", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "f13433dc-51a3-41e5-8a0b-b0f21724ef98", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 2, "id": "db7c764f-d216-44a9-8f88-0e3a7c51377a", "metadata": {}, "outputs": [], "source": [ "ccc= CompoundCacher()" ] }, { "cell_type": "code", "execution_count": 3, "id": "09e6f7f2-5be7-4db3-b55d-756ecb711095", "metadata": {}, "outputs": [], "source": [ "a = ccc.get_compound('C00001')" ] }, { "cell_type": "code", "execution_count": 4, "id": "d28e44b7-d942-4739-9d7d-2f4e082ac1b9", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "81.4472134155519" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "a.transform_pH7(7, 0.25 , 298)" ] }, { "cell_type": "code", "execution_count": 5, "id": "1ef3fc0d-7d63-42ea-8743-522fe010a95d", "metadata": {}, "outputs": [], "source": [ "inchi_k = \"InChI=1S/C14H14O/c15-14-8-4-7-13(11-14)10-9-12-5-2-1-3-6-12/h1-8,11,15H,9-10H2\" ;" ] }, { "cell_type": "code", "execution_count": 6, "id": "4e651d1c-2c96-42d1-adab-466dc7518146", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\vuu10\\AppData\\Local\\Continuum\\anaconda3\\envs\\dGPredictor_py3\\lib\\openbabel\\__init__.py:14: UserWarning: \"import openbabel\" is deprecated, instead use \"from openbabel import openbabel\"\n", " warnings.warn('\"import openbabel\" is deprecated, instead use \"from openbabel import openbabel\"')\n" ] } ], "source": [ "c = Compound.from_inchi('Test', 'sajdf', inchi_k )" ] }, { "cell_type": "code", "execution_count": 18, "id": "6eb5c2dc-f14c-46de-889b-0e9b7faa9f79", "metadata": {}, "outputs": [ { "ename": "AttributeError", "evalue": "'Compound' object has no attribute 'smiles_ph7'", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mAttributeError\u001b[0m Traceback (most recent call last)", "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mc\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msmiles_ph7\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[1;31mAttributeError\u001b[0m: 'Compound' object has no attribute 'smiles_ph7'" ] } ], "source": [ "c.smiles_ph7()" ] }, { "cell_type": "code", "execution_count": 7, "id": "edd156dc-4355-4c2c-ba4e-6d98e776a96a", "metadata": {}, "outputs": [], "source": [ "from chemaxon import *\n", "import chemaxon" ] }, { "cell_type": "code", "execution_count": 8, "id": "880d2ef6-6b03-49d3-8f60-66769c22a84d", "metadata": {}, "outputs": [], "source": [ "pKas, major_ms_smiles = chemaxon.GetDissociationConstants(inchi_k)" ] }, { "cell_type": "code", "execution_count": 9, "id": "7a2391dc-313c-47f2-9f54-823bfdb95fcd", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'OC1=CC=CC(CCC2=CC=CC=C2)=C1\\r'" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "major_ms_smiles" ] }, { "cell_type": "code", "execution_count": 10, "id": "96d90c4a-14a2-45fb-8573-97db84de2dff", "metadata": {}, "outputs": [], "source": [ "major_ms_smiles = Compound.smiles2smiles(major_ms_smiles)" ] }, { "cell_type": "code", "execution_count": 11, "id": "36d46620-b895-4ec8-85d0-7499759812c6", "metadata": {}, "outputs": [], "source": [ "MIN_PH = 0.0\n", "MAX_PH = 14.0\n", "pKas = sorted([pka for pka in pKas if pka > MIN_PH and pka < MAX_PH], reverse=True)" ] }, { "cell_type": "code", "execution_count": 12, "id": "ffccf9d9-5a52-4be6-af4c-f39b3db2a27c", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[10.1]" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pKas" ] }, { "cell_type": "code", "execution_count": 13, "id": "e83721fa-9a42-42ef-9a03-59fc2689c73b", "metadata": {}, "outputs": [], "source": [ "atom_bag, major_ms_charge = chemaxon.GetAtomBagAndCharge(major_ms_smiles)" ] }, { "cell_type": "code", "execution_count": null, "id": "47a87ed7-968d-44b6-a237-a8469ba3fe3b", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "49cfefde-ee96-4ca8-89af-c50f2f2ca70b", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "9b881c7b-a14a-4561-9c3c-157116efdfd0", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "10c8f915-e61a-4560-b546-fe6ea8bfdde3", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "936fafa5-1bf6-495c-be79-d4cc620f4861", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "285f9370-2fba-44c4-a36b-66c95f9f2eed", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "adbcd78f-869a-4cc9-b727-03c80df31edd", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "17fbfee9-c8b7-4644-814f-0e8aa0ad5ee9", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 21, "id": "70f90669-ff90-4bc4-955c-63672e42bb3c", "metadata": {}, "outputs": [], "source": [ "formula, formal_charge = GetFormulaAndCharge(molstring)\n", "\n", "atom_bag = {}" ] }, { "cell_type": "code", "execution_count": 25, "id": "e40e4088-c246-4afb-98ae-f92cb738e988", "metadata": {}, "outputs": [], "source": [ "for mol_formula_times in formula.split('.'):\n", " for times, mol_formula in re.findall('^(\\d+)?(\\w+)', mol_formula_times):\n", " if not times:\n", " times = 1\n", " else:\n", " times = int(times)\n", " for atom, count in re.findall(\"([A-Z][a-z]*)([0-9]*)\", mol_formula):\n", " if count == '':\n", " count = 1\n", " else:\n", " count = int(count)\n", " atom_bag[atom] = atom_bag.get(atom, 0) + count * times" ] }, { "cell_type": "code", "execution_count": 26, "id": "391cfbba-2da5-4b60-ba32-217754913b35", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'C': 14, 'H': 14, 'O': 1}" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "atom_bag" ] }, { "cell_type": "code", "execution_count": 52, "id": "812f8297-a5cc-4d63-b132-243c278c6b76", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "6\n", "1\n", "8\n" ] } ], "source": [ "from rdkit.Chem import rdchem\n", "for (elem, c) in atom_bag.items():\n", " ll = rdchem.GetPeriodicTable()\n", " atomic_num = ll.GetAtomicNumber(elem)\n", " print(atomic_num)" ] }, { "cell_type": "code", "execution_count": 55, "id": "463fcb01-2cd0-4aee-990c-946c534dc766", "metadata": {}, "outputs": [], "source": [ "\n", "n_protons = sum([c * ll.GetAtomicNumber(str(elem))\n", " for (elem, c) in atom_bag.items()])" ] }, { "cell_type": "code", "execution_count": 57, "id": "ac1c69f6-54db-41ba-9fdf-e7ab6a2dfcbc", "metadata": {}, "outputs": [], "source": [ "atom_bag['e-'] = n_protons - formal_charge" ] }, { "cell_type": "code", "execution_count": 58, "id": "61b1931e-dbaf-4e0f-afb2-6595f64d70d6", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'C': 14, 'H': 14, 'O': 1, 'e-': 106}" ] }, "execution_count": 58, "metadata": {}, "output_type": "execute_result" } ], "source": [ "atom_bag" ] }, { "cell_type": "code", "execution_count": 60, "id": "12bdbf80-7dc5-4d47-a479-703ad5a6aa06", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0" ] }, "execution_count": 60, "metadata": {}, "output_type": "execute_result" } ], "source": [ "\n", "formal_charge\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "id": "b51f36c0-707a-4856-8c23-9081e2ea2cf7", "metadata": {}, "outputs": [], "source": [ "all_pKas, smiles_list = GetDissociationConstants_val(inchi_k)" ] }, { "cell_type": "code", "execution_count": 13, "id": "6dd79761-760d-4233-b113-a34e6322a0e5", "metadata": {}, "outputs": [], "source": [ "MID_PH = 7.0\n", "N_PKAS = 20\n", "\n", "n_acidic = N_PKAS\n", "n_basic = N_PKAS\n", "pH = MID_PH" ] }, { "cell_type": "code", "execution_count": 14, "id": "6167191a-b361-4ae0-a78a-927490c72f87", "metadata": {}, "outputs": [], "source": [ "args = []\n", "if n_acidic + n_basic > 0:\n", " args += ['pka', '-a', str(n_acidic), '-b', str(n_basic),\n", " 'majorms', '-M', 'true', '--pH', str(pH)]\n" ] }, { "cell_type": "code", "execution_count": 15, "id": "dd4275ec-c71e-4b5b-bb35-de8b3c7c4883", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['pka', '-a', '20', '-b', '20', 'majorms', '-M', 'true', '--pH', '7.0']" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "args" ] }, { "cell_type": "code", "execution_count": null, "id": "79d07dc5-963a-4373-9d72-1eb6de48ede9", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 16, "id": "712a71fb-e3e3-4b01-828d-5a3862aa1b30", "metadata": {}, "outputs": [], "source": [ "logging.debug(\"INPUT: echo %s | %s\" % (inchi_k, ' '.join([CXCALC_BIN] + args)))" ] }, { "cell_type": "code", "execution_count": 17, "id": "287bf822-23b8-42de-85ca-e52678875cfa", "metadata": {}, "outputs": [], "source": [ "molstring= inchi_k" ] }, { "cell_type": "code", "execution_count": 18, "id": "4d2ff427-237c-4d63-a718-f29f12884d96", "metadata": {}, "outputs": [], "source": [ "p1 = Popen([\"echo\", molstring], stdout=PIPE, shell=use_shell_for_echo)" ] }, { "cell_type": "code", "execution_count": 19, "id": "923a09f2-b959-4837-ab1a-a858d91de0b4", "metadata": {}, "outputs": [], "source": [ "p2 = Popen([CXCALC_BIN] + args, stdin=p1.stdout,\n", " executable=CXCALC_BIN, stdout=PIPE, shell=False)" ] }, { "cell_type": "code", "execution_count": 20, "id": "a6b30545-c65a-4c56-9985-71a103b9da00", "metadata": {}, "outputs": [], "source": [ "res = p2.communicate()[0]" ] }, { "cell_type": "code", "execution_count": 21, "id": "ac059602-027f-4a1a-932f-c1339c38c7d7", "metadata": {}, "outputs": [], "source": [ "if p2.returncode != 0:\n", " raise ChemAxonError(str(args))\n", "logging.debug(\"OUTPUT: %s\" % res)" ] }, { "cell_type": "code", "execution_count": 22, "id": "671642a5-3877-44e3-b935-f987fd601444", "metadata": {}, "outputs": [], "source": [ "output = res" ] }, { "cell_type": "code", "execution_count": 23, "id": "a9f4bb4a-af86-4e97-bf1d-40c58013f90e", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "b'id\\tapKa1\\tapKa2\\tapKa3\\tapKa4\\tapKa5\\tapKa6\\tapKa7\\tapKa8\\tapKa9\\tapKa10\\tapKa11\\tapKa12\\tapKa13\\tapKa14\\tapKa15\\tapKa16\\tapKa17\\tapKa18\\tapKa19\\tapKa20\\tbpKa1\\tbpKa2\\tbpKa3\\tbpKa4\\tbpKa5\\tbpKa6\\tbpKa7\\tbpKa8\\tbpKa9\\tbpKa10\\tbpKa11\\tbpKa12\\tbpKa13\\tbpKa14\\tbpKa15\\tbpKa16\\tbpKa17\\tbpKa18\\tbpKa19\\tbpKa20\\tatoms\\tmajor-ms\\r\\n1\\t10.10\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t-5.48\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t\\t15,15\\tOC1=CC=CC(CCC2=CC=CC=C2)=C1\\r\\n'" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "output" ] }, { "cell_type": "code", "execution_count": 24, "id": "215ffc9b-35a8-4f45-8f39-9c99deae6335", "metadata": {}, "outputs": [], "source": [ "atom2pKa, smiles_list = ParsePkaOutput(output, n_acidic, n_basic)" ] }, { "cell_type": "code", "execution_count": 26, "id": "21c380d3-5410-4c55-b6d7-cb0588f373ca", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['OC1=CC=CC(CCC2=CC=CC=C2)=C1\\r']" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "smiles_list" ] }, { "cell_type": "code", "execution_count": 27, "id": "1437693a-0923-4df1-837d-acb2b524fcae", "metadata": {}, "outputs": [], "source": [ "all_pKas = []\n", "for pKa_list in list(atom2pKa.values()):\n", " all_pKas += [pKa for pKa, _ in pKa_list]" ] }, { "cell_type": "code", "execution_count": 28, "id": "8e77324c-ed61-4615-a7c7-4f5ca781dc90", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[10.1, -5.48]" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "all_pKas" ] }, { "cell_type": "code", "execution_count": null, "id": "8616be46-1814-4755-b919-4b7790569890", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.10" } }, "nbformat": 4, "nbformat_minor": 5 }