{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": true, "jupyter": { "outputs_hidden": true }, "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "import matplotlib\n", "import matplotlib.pyplot as plt\n", "import pandas as pd\n", "import seaborn as sn\n", "from rdkit import Chem, DataStructs\n", "from rdkit.Chem import Draw\n", "from rdkit.Chem.Draw import IPythonConsole\n", "\n", "matplotlib.style.use(\"fivethirtyeight\")\n", "matplotlib.style.use(\"seaborn-talk\")\n", "matplotlib.rcParams['font.family'] = \"monospace\"\n", "matplotlib.rcParams['figure.dpi'] = 200\n", "matplotlib.pyplot.rcParams['savefig.facecolor'] = 'white'\n", "sn.set_context(\"poster\")\n", "IPythonConsole.ipython_useSVG = False\n" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false }, "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "trapnell_df = pd.read_csv(\"../embeddings/trapnell_drugs_smiles.csv\", names=[\"drug\", \"smiles\", \"pathway\"])\n", "trapnell_df[\"smiles\"] = trapnell_df.smiles.str.strip()\n", "lincs_df = pd.read_csv(\"../embeddings/lincs_drugs_smiles.csv\", names=[\"drug\", \"smiles\"])\n", "lincs_df[\"smiles\"] = lincs_df.smiles.str.strip()" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false }, "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "def tanimoto_score(input_smiles, target_smiles):\n", " input_fp = Chem.RDKFingerprint(Chem.MolFromSmiles(input_smiles))\n", " target_fp = Chem.RDKFingerprint(Chem.MolFromSmiles(target_smiles))\n", " return DataStructs.TanimotoSimilarity(input_fp, target_fp)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Checking 3 hold out drugs\n", "Looking for the most similar drugs in LINCS to our 3 hold out drug in Trapnell" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false }, "pycharm": { "name": "#%%\n" } }, "outputs": [ { "data": { "text/html": [ "
\n", " | drug | \n", "smiles | \n", "pathway | \n", "
---|---|---|---|
26 | \n", "BMS-754807 | \n", "CC1(C(=O)Nc2ccc(F)nc2)CCCN1c1nc(Nc2cc(C3CC3)[n... | \n", "Protein Tyrosine Kinase | \n", "
69 | \n", "Flavopiridol | \n", "CN1CCC(c2c(O)cc(O)c3c(=O)cc(-c4ccccc4Cl)oc23)C... | \n", "Cell Cycle | \n", "
129 | \n", "Quisinostat | \n", "Cl.Cl.Cn1cc(CNCC2CCN(c3ncc(C(=O)NO)cn3)CC2)c2c... | \n", "Epigenetics | \n", "