{ "cells": [ { "cell_type": "markdown", "id": "45d54f76-45c5-46c1-aeb2-dde40c63e8fc", "metadata": {}, "source": [ "**Requirements** \n", "* According to this [paper](https://arxiv.org/pdf/1904.01561.pdf), features are computed with [descriptastorus](https://github.com/bp-kelley/descriptastorus) package\n", "* Install via: `pip install git+https://github.com/bp-kelley/descriptastorus`" ] }, { "cell_type": "markdown", "id": "fa137ded", "metadata": {}, "source": [ "## General imports" ] }, { "cell_type": "code", "execution_count": 4, "id": "6c950b63", "metadata": {}, "outputs": [], "source": [ "import sys\n", "\n", "# this depends on the notebook depth and must be adapted per notebook\n", "sys.path.insert(0, \"/\") \n", "from chemCPA.paths import DATA_DIR, EMBEDDING_DIR" ] }, { "cell_type": "code", "execution_count": 5, "id": "4643260d", "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "from joblib import Parallel, delayed\n", "from tqdm.notebook import tqdm" ] }, { "cell_type": "markdown", "id": "db8608d6", "metadata": {}, "source": [ "## Load Smiles list" ] }, { "cell_type": "code", "execution_count": 6, "id": "db1601d1", "metadata": {}, "outputs": [], "source": [ "import scanpy as sc\n", "from chemCPA.helper import canonicalize_smiles" ] }, { "cell_type": "code", "execution_count": 7, "id": "842d22c4", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/nfs/staff-hdd/hetzell/miniconda3/envs/chemical_CPA/lib/python3.7/site-packages/anndata/_core/anndata.py:1828: UserWarning: Observation names are not unique. To make them unique, call `.obs_names_make_unique`.\n", " utils.warn_names_duplicates(\"obs\")\n" ] } ], "source": [ "adata = sc.read(DATA_DIR/ \"adata_biolord_split_30.h5ad\")" ] }, { "cell_type": "code", "execution_count": 8, "id": "734a3db6", "metadata": {}, "outputs": [], "source": [ "smiles_list = adata.obs[\"smiles\"].unique()\n", "# exclude nan from smiles_list \n", "smiles_list = [canonicalize_smiles(s) for s in smiles_list if s !=\"nan\"]" ] }, { "cell_type": "code", "execution_count": 9, "id": "653d99cf", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Number of smiles strings: 186\n" ] } ], "source": [ "print(f'Number of smiles strings: {len(smiles_list)}')" ] }, { "cell_type": "code", "execution_count": 10, "id": "a5dc19a2-d321-49e6-a62d-e6024073146e", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "RDKit2D_calculated(bool)\n", "BalabanJ(float64)\n", "BertzCT(float64)\n", "Chi0(float64)\n", "Chi0n(float64)\n", "Chi0v(float64)\n", "Chi1(float64)\n", "Chi1n(float64)\n", "Chi1v(float64)\n", "Chi2n(float64)\n", "Chi2v(float64)\n", "Chi3n(float64)\n", "Chi3v(float64)\n", "Chi4n(float64)\n", "Chi4v(float64)\n", "EState_VSA1(float64)\n", "EState_VSA10(float64)\n", "EState_VSA11(float64)\n", "EState_VSA2(float64)\n", "EState_VSA3(float64)\n", "EState_VSA4(float64)\n", "EState_VSA5(float64)\n", "EState_VSA6(float64)\n", "EState_VSA7(float64)\n", "EState_VSA8(float64)\n", "EState_VSA9(float64)\n", "ExactMolWt(float64)\n", "FpDensityMorgan1(float64)\n", "FpDensityMorgan2(float64)\n", "FpDensityMorgan3(float64)\n", "FractionCSP3(float64)\n", "HallKierAlpha(float64)\n", "HeavyAtomCount(float64)\n", "HeavyAtomMolWt(float64)\n", "Ipc(float64)\n", "Kappa1(float64)\n", "Kappa2(float64)\n", "Kappa3(float64)\n", "LabuteASA(float64)\n", "MaxAbsEStateIndex(float64)\n", "MaxAbsPartialCharge(float64)\n", "MaxEStateIndex(float64)\n", "MaxPartialCharge(float64)\n", "MinAbsEStateIndex(float64)\n", "MinAbsPartialCharge(float64)\n", "MinEStateIndex(float64)\n", "MinPartialCharge(float64)\n", "MolLogP(float64)\n", "MolMR(float64)\n", "MolWt(float64)\n", "NHOHCount(float64)\n", "NOCount(float64)\n", "NumAliphaticCarbocycles(float64)\n", "NumAliphaticHeterocycles(float64)\n", "NumAliphaticRings(float64)\n", "NumAromaticCarbocycles(float64)\n", "NumAromaticHeterocycles(float64)\n", "NumAromaticRings(float64)\n", "NumHAcceptors(float64)\n", "NumHDonors(float64)\n", "NumHeteroatoms(float64)\n", "NumRadicalElectrons(float64)\n", "NumRotatableBonds(float64)\n", "NumSaturatedCarbocycles(float64)\n", "NumSaturatedHeterocycles(float64)\n", "NumSaturatedRings(float64)\n", "NumValenceElectrons(float64)\n", "PEOE_VSA1(float64)\n", "PEOE_VSA10(float64)\n", "PEOE_VSA11(float64)\n", "PEOE_VSA12(float64)\n", "PEOE_VSA13(float64)\n", "PEOE_VSA14(float64)\n", "PEOE_VSA2(float64)\n", "PEOE_VSA3(float64)\n", "PEOE_VSA4(float64)\n", "PEOE_VSA5(float64)\n", "PEOE_VSA6(float64)\n", "PEOE_VSA7(float64)\n", "PEOE_VSA8(float64)\n", "PEOE_VSA9(float64)\n", "RingCount(float64)\n", "SMR_VSA1(float64)\n", "SMR_VSA10(float64)\n", "SMR_VSA2(float64)\n", "SMR_VSA3(float64)\n", "SMR_VSA4(float64)\n", "SMR_VSA5(float64)\n", "SMR_VSA6(float64)\n", "SMR_VSA7(float64)\n", "SMR_VSA8(float64)\n", "SMR_VSA9(float64)\n", "SlogP_VSA1(float64)\n", "SlogP_VSA10(float64)\n", "SlogP_VSA11(float64)\n", "SlogP_VSA12(float64)\n", "SlogP_VSA2(float64)\n", "SlogP_VSA3(float64)\n", "SlogP_VSA4(float64)\n", "SlogP_VSA5(float64)\n", "SlogP_VSA6(float64)\n", "SlogP_VSA7(float64)\n", "SlogP_VSA8(float64)\n", "SlogP_VSA9(float64)\n", "TPSA(float64)\n", "VSA_EState1(float64)\n", "VSA_EState10(float64)\n", "VSA_EState2(float64)\n", "VSA_EState3(float64)\n", "VSA_EState4(float64)\n", "VSA_EState5(float64)\n", "VSA_EState6(float64)\n", "VSA_EState7(float64)\n", "VSA_EState8(float64)\n", "VSA_EState9(float64)\n", "fr_Al_COO(float64)\n", "fr_Al_OH(float64)\n", "fr_Al_OH_noTert(float64)\n", "fr_ArN(float64)\n", "fr_Ar_COO(float64)\n", "fr_Ar_N(float64)\n", "fr_Ar_NH(float64)\n", "fr_Ar_OH(float64)\n", "fr_COO(float64)\n", "fr_COO2(float64)\n", "fr_C_O(float64)\n", "fr_C_O_noCOO(float64)\n", "fr_C_S(float64)\n", "fr_HOCCN(float64)\n", "fr_Imine(float64)\n", "fr_NH0(float64)\n", "fr_NH1(float64)\n", "fr_NH2(float64)\n", "fr_N_O(float64)\n", "fr_Ndealkylation1(float64)\n", "fr_Ndealkylation2(float64)\n", "fr_Nhpyrrole(float64)\n", "fr_SH(float64)\n", "fr_aldehyde(float64)\n", "fr_alkyl_carbamate(float64)\n", "fr_alkyl_halide(float64)\n", "fr_allylic_oxid(float64)\n", "fr_amide(float64)\n", "fr_amidine(float64)\n", "fr_aniline(float64)\n", "fr_aryl_methyl(float64)\n", "fr_azide(float64)\n", "fr_azo(float64)\n", "fr_barbitur(float64)\n", "fr_benzene(float64)\n", "fr_benzodiazepine(float64)\n", "fr_bicyclic(float64)\n", "fr_diazo(float64)\n", "fr_dihydropyridine(float64)\n", "fr_epoxide(float64)\n", "fr_ester(float64)\n", "fr_ether(float64)\n", "fr_furan(float64)\n", "fr_guanido(float64)\n", "fr_halogen(float64)\n", "fr_hdrzine(float64)\n", "fr_hdrzone(float64)\n", "fr_imidazole(float64)\n", "fr_imide(float64)\n", "fr_isocyan(float64)\n", "fr_isothiocyan(float64)\n", "fr_ketone(float64)\n", "fr_ketone_Topliss(float64)\n", "fr_lactam(float64)\n", "fr_lactone(float64)\n", "fr_methoxy(float64)\n", "fr_morpholine(float64)\n", "fr_nitrile(float64)\n", "fr_nitro(float64)\n", "fr_nitro_arom(float64)\n", "fr_nitro_arom_nonortho(float64)\n", "fr_nitroso(float64)\n", "fr_oxazole(float64)\n", "fr_oxime(float64)\n", "fr_para_hydroxylation(float64)\n", "fr_phenol(float64)\n", "fr_phenol_noOrthoHbond(float64)\n", "fr_phos_acid(float64)\n", "fr_phos_ester(float64)\n", "fr_piperdine(float64)\n", "fr_piperzine(float64)\n", "fr_priamide(float64)\n", "fr_prisulfonamd(float64)\n", "fr_pyridine(float64)\n", "fr_quatN(float64)\n", "fr_sulfide(float64)\n", "fr_sulfonamd(float64)\n", "fr_sulfone(float64)\n", "fr_term_acetylene(float64)\n", "fr_tetrazole(float64)\n", "fr_thiazole(float64)\n", "fr_thiocyan(float64)\n", "fr_thiophene(float64)\n", "fr_unbrch_alkane(float64)\n", "fr_urea(float64)\n", "qed(float64)\n" ] } ], "source": [ "from descriptastorus.descriptors.DescriptorGenerator import MakeGenerator\n", "generator = MakeGenerator((\"RDKit2D\",))\n", "for name, numpy_type in generator.GetColumns():\n", " print(f\"{name}({numpy_type.__name__})\")" ] }, { "cell_type": "code", "execution_count": 11, "id": "003cc588-e4dd-4dcc-98ec-4d3fcdd5432b", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "3cdbc7fd2ace48aa960a1f81738956bc", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/186 [00:00, ?it/s]" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "n_jobs = 16\n", "data = Parallel(n_jobs=n_jobs)(delayed(generator.process)(smiles) for smiles in tqdm(smiles_list, position=0, leave=True) )" ] }, { "cell_type": "code", "execution_count": 12, "id": "055b3661", "metadata": {}, "outputs": [], "source": [ "data = [d[1:] for d in data]" ] }, { "cell_type": "code", "execution_count": 13, "id": "ba76cf35-7e75-4b53-8e82-9ea09a90f01a", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(186, 200)" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "embedding = np.array(data)\n", "embedding.shape" ] }, { "cell_type": "markdown", "id": "f784e11a-82e8-4b63-90c0-5b835bd1f1a5", "metadata": {}, "source": [ "## Check `nans` and `infs`" ] }, { "cell_type": "markdown", "id": "9296ab55-04e8-47c7-8624-bf68330c5553", "metadata": {}, "source": [ "Check for `nans`" ] }, { "cell_type": "code", "execution_count": 14, "id": "71e23bf2-e2dc-4cf7-a2f2-253a3a35ff99", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "drug_idx:\n", " []\n", "feature_idx:\n", " []\n" ] } ], "source": [ "drug_idx, feature_idx = np.where(np.isnan(embedding))\n", "print(f'drug_idx:\\n {drug_idx}')\n", "print(f'feature_idx:\\n {feature_idx}')" ] }, { "cell_type": "markdown", "id": "ac8119cb-4f13-4498-a21d-3c100727ec71", "metadata": {}, "source": [ "Check for `infs` and add to idx lists" ] }, { "cell_type": "code", "execution_count": 15, "id": "5ed78575-5351-433a-934d-fb969f33a9a6", "metadata": {}, "outputs": [], "source": [ "drug_idx_infs, feature_idx_infs = np.where(np.isinf(embedding))\n", "\n", "drug_idx = np.concatenate((drug_idx, drug_idx_infs))\n", "feature_idx = np.concatenate((feature_idx, feature_idx_infs))" ] }, { "cell_type": "markdown", "id": "ed64de53-ff22-4f86-839e-9fb70c0558db", "metadata": {}, "source": [ "Features that have these invalid values:" ] }, { "cell_type": "code", "execution_count": 16, "id": "eba4e112-e676-4816-9fe4-d2ba9b464b1d", "metadata": { "tags": [] }, "outputs": [ { "data": { "text/plain": [ "array([], shape=(0, 2), dtype=object)" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.array(generator.GetColumns())[np.unique(feature_idx)]" ] }, { "cell_type": "markdown", "id": "a851a96f-27d3-42b6-a74c-db5c6fa6a257", "metadata": {}, "source": [ "Set values to `0`" ] }, { "cell_type": "code", "execution_count": 17, "id": "0a7fb00f-468b-4957-96cc-ae0974c54780", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([], dtype=float64)" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "embedding[drug_idx, feature_idx] " ] }, { "cell_type": "code", "execution_count": 18, "id": "1ee85449-5515-4d93-94ae-b3a4845e088b", "metadata": {}, "outputs": [], "source": [ "embedding[drug_idx, feature_idx] = 0" ] }, { "cell_type": "markdown", "id": "cf768d83", "metadata": {}, "source": [ "## Save" ] }, { "cell_type": "code", "execution_count": 19, "id": "a6291a01", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Deleting columns with std<=0.01: ['latent_60', 'latent_89', 'latent_102', 'latent_137', 'latent_145', 'latent_146', 'latent_147', 'latent_149', 'latent_151', 'latent_152', 'latent_159', 'latent_160', 'latent_163', 'latent_164', 'latent_167', 'latent_174', 'latent_177', 'latent_182', 'latent_186', 'latent_188', 'latent_193', 'latent_195']\n" ] } ], "source": [ "import pandas as pd\n", "\n", "df = pd.DataFrame(data=embedding,index=smiles_list,columns=[f'latent_{i}' for i in range(embedding.shape[1])]) \n", "\n", "# Drop first feature from generator (RDKit2D_calculated)\n", "df.drop(columns=['latent_0'], inplace=True)\n", "\n", "# Drop columns with 0 standard deviation\n", "threshold = 0.01\n", "columns=[f'latent_{idx+1}' for idx in np.where(df.std() <= threshold)[0]]\n", "print(f'Deleting columns with std<={threshold}: {columns}')\n", "df.drop(columns=[f'latent_{idx+1}' for idx in np.where(df.std() <= 0.01)[0]], inplace=True)" ] }, { "cell_type": "markdown", "id": "0f14068e-51b8-40a9-b9c6-043c12b082ee", "metadata": {}, "source": [ "Check that correct columns were deleted: " ] }, { "cell_type": "code", "execution_count": 20, "id": "c92f8a87-ce38-4309-a6c4-4e5b828b59c7", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(array([], dtype=int64),)" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.where(df.std() <= threshold)" ] }, { "cell_type": "markdown", "id": "2d0d8c60-79ae-4451-ae5c-321f533bed48", "metadata": {}, "source": [ "### Normalise dataframe" ] }, { "cell_type": "code", "execution_count": 21, "id": "721590cd-67bb-4c70-bef4-268fbfa9a7cc", "metadata": {}, "outputs": [], "source": [ "normalized_df=(df-df.mean())/df.std()" ] }, { "cell_type": "code", "execution_count": 22, "id": "f4b63954-a11e-4384-945d-c94e2b629026", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | latent_1 | \n", "latent_2 | \n", "latent_3 | \n", "latent_4 | \n", "latent_5 | \n", "latent_6 | \n", "latent_7 | \n", "latent_8 | \n", "latent_9 | \n", "latent_10 | \n", "... | \n", "latent_187 | \n", "latent_189 | \n", "latent_190 | \n", "latent_191 | \n", "latent_192 | \n", "latent_194 | \n", "latent_196 | \n", "latent_197 | \n", "latent_198 | \n", "latent_199 | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
O=C([O-])CCCc1ccccc1.[Na+] | \n", "-1.666455 | \n", "-1.600675 | \n", "-1.385933 | \n", "-1.134785 | \n", "-1.605154 | \n", "-1.460731 | \n", "-1.536649 | \n", "-1.465486 | \n", "-1.543553 | \n", "-1.452940 | \n", "... | \n", "-0.535847 | \n", "-0.19722 | \n", "-0.273387 | \n", "-0.250038 | \n", "-0.073324 | \n", "-0.199302 | \n", "-0.19722 | \n", "-0.270527 | \n", "-0.284531 | \n", "0.121276 | \n", "
CN1CCN(c2cccc(Nc3nc4c(-c5ccc(S(C)(=O)=O)cc5)cccn4n3)c2)CC1 | \n", "0.863033 | \n", "0.376632 | \n", "0.455178 | \n", "0.475603 | \n", "0.463247 | \n", "0.462348 | \n", "0.777506 | \n", "0.503243 | \n", "0.879424 | \n", "0.503081 | \n", "... | \n", "1.457503 | \n", "-0.19722 | \n", "-0.273387 | \n", "3.977884 | \n", "-0.073324 | \n", "-0.199302 | \n", "-0.19722 | \n", "-0.270527 | \n", "-0.284531 | \n", "0.060135 | \n", "
COc1ccc2cc(-c3nc(-c4ccc(S(C)=O)cc4)[nH]c3-c3ccncc3)ccc2c1 | \n", "0.944786 | \n", "0.248064 | \n", "0.295501 | \n", "0.318775 | \n", "0.412792 | \n", "0.268612 | \n", "0.506380 | \n", "0.178487 | \n", "0.384114 | \n", "0.284651 | \n", "... | \n", "1.457503 | \n", "-0.19722 | \n", "-0.273387 | \n", "-0.250038 | \n", "-0.073324 | \n", "-0.199302 | \n", "-0.19722 | \n", "-0.270527 | \n", "-0.284531 | \n", "-0.496321 | \n", "
COc1cc(Nc2c(C#N)cnc3cc(OCCCN4CCN(C)CC4)c(OC)cc23)c(Cl)cc1Cl | \n", "0.603507 | \n", "0.746555 | \n", "0.789836 | \n", "0.920548 | \n", "0.783927 | \n", "0.701164 | \n", "0.700273 | \n", "0.582936 | \n", "0.573394 | \n", "0.651072 | \n", "... | \n", "1.457503 | \n", "-0.19722 | \n", "-0.273387 | \n", "-0.250038 | \n", "-0.073324 | \n", "-0.199302 | \n", "-0.19722 | \n", "0.628009 | \n", "-0.284531 | \n", "-0.508369 | \n", "
COc1cc(C=CC(=O)CC(=O)C=Cc2ccc(O)c(OC)c2)ccc1O | \n", "-0.393464 | \n", "-0.067916 | \n", "-0.163103 | \n", "-0.268156 | \n", "-0.140663 | \n", "-0.313933 | \n", "-0.456744 | \n", "-0.450171 | \n", "-0.621424 | \n", "-0.534237 | \n", "... | \n", "-0.535847 | \n", "-0.19722 | \n", "-0.273387 | \n", "-0.250038 | \n", "-0.073324 | \n", "-0.199302 | \n", "-0.19722 | \n", "-0.270527 | \n", "-0.284531 | \n", "0.379172 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
CCS(=O)(=O)c1cccc(-c2cc(C(=O)NC3CCN(C)CC3)c(C)c3[nH]c4ncc(C)cc4c23)c1 | \n", "1.286344 | \n", "0.740428 | \n", "0.887842 | \n", "0.900547 | \n", "0.732054 | \n", "0.910649 | \n", "1.136028 | \n", "1.014856 | \n", "1.426770 | \n", "1.140227 | \n", "... | \n", "1.457503 | \n", "-0.19722 | \n", "-0.273387 | \n", "3.977884 | \n", "-0.073324 | \n", "-0.199302 | \n", "-0.19722 | \n", "-0.270527 | \n", "-0.284531 | \n", "-0.325822 | \n", "
CCN(CC)Cc1ccc2cc(COC(=O)Nc3ccc(C(=O)NO)cc3)ccc2c1.Cl.O | \n", "0.123124 | \n", "0.267321 | \n", "0.453349 | \n", "0.473807 | \n", "0.289093 | \n", "0.295629 | \n", "0.117262 | \n", "0.090182 | \n", "-0.130664 | \n", "0.137535 | \n", "... | \n", "-0.535847 | \n", "-0.19722 | \n", "-0.273387 | \n", "-0.250038 | \n", "-0.073324 | \n", "-0.199302 | \n", "-0.19722 | \n", "-0.270527 | \n", "-0.284531 | \n", "-0.746358 | \n", "
Cl.Cl.Cn1cc(CNCC2CCN(c3ncc(C(=O)NO)cn3)CC2)c2ccccc21 | \n", "-0.028239 | \n", "-0.011372 | \n", "0.230844 | \n", "0.391784 | \n", "0.111804 | \n", "0.200441 | \n", "0.027626 | \n", "0.214944 | \n", "-0.017353 | \n", "0.364936 | \n", "... | \n", "-0.535847 | \n", "-0.19722 | \n", "-0.273387 | \n", "-0.250038 | \n", "-0.073324 | \n", "-0.199302 | \n", "-0.19722 | \n", "-0.270527 | \n", "-0.284531 | \n", "-0.495217 | \n", "
CCS(=O)(=O)Nc1ccc2[nH]c(O)c(C(=Nc3ccc(CN4CCCCC4)cc3)c3ccccc3)c2c1 | \n", "1.115222 | \n", "0.770763 | \n", "0.847426 | \n", "0.860852 | \n", "0.884172 | \n", "0.997570 | \n", "1.217879 | \n", "0.869661 | \n", "1.210711 | \n", "0.882431 | \n", "... | \n", "-0.535847 | \n", "-0.19722 | \n", "3.638151 | \n", "-0.250038 | \n", "-0.073324 | \n", "-0.199302 | \n", "-0.19722 | \n", "-0.270527 | \n", "-0.284531 | \n", "-1.140448 | \n", "
CN1CCC(c2c(O)cc(O)c3c(=O)cc(-c4ccccc4Cl)oc23)C(O)C1.Cl | \n", "0.252492 | \n", "-0.041707 | \n", "-0.056939 | \n", "0.099009 | \n", "-0.050176 | \n", "-0.053351 | \n", "-0.110797 | \n", "0.095547 | \n", "0.002473 | \n", "0.210616 | \n", "... | \n", "-0.535847 | \n", "-0.19722 | \n", "-0.273387 | \n", "-0.250038 | \n", "-0.073324 | \n", "-0.199302 | \n", "-0.19722 | \n", "-0.270527 | \n", "-0.284531 | \n", "0.466665 | \n", "
186 rows × 177 columns
\n", "\n", " | latent_1 | \n", "latent_2 | \n", "latent_3 | \n", "latent_4 | \n", "latent_5 | \n", "latent_6 | \n", "latent_7 | \n", "latent_8 | \n", "latent_9 | \n", "latent_10 | \n", "... | \n", "latent_187 | \n", "latent_189 | \n", "latent_190 | \n", "latent_191 | \n", "latent_192 | \n", "latent_194 | \n", "latent_196 | \n", "latent_197 | \n", "latent_198 | \n", "latent_199 | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
O=C([O-])CCCc1ccccc1.[Na+] | \n", "-1.666455 | \n", "-1.600675 | \n", "-1.385933 | \n", "-1.134785 | \n", "-1.605154 | \n", "-1.460731 | \n", "-1.536649 | \n", "-1.465486 | \n", "-1.543553 | \n", "-1.452940 | \n", "... | \n", "-0.535847 | \n", "-0.19722 | \n", "-0.273387 | \n", "-0.250038 | \n", "-0.073324 | \n", "-0.199302 | \n", "-0.19722 | \n", "-0.270527 | \n", "-0.284531 | \n", "0.121276 | \n", "
CN1CCN(c2cccc(Nc3nc4c(-c5ccc(S(C)(=O)=O)cc5)cccn4n3)c2)CC1 | \n", "0.863033 | \n", "0.376632 | \n", "0.455178 | \n", "0.475603 | \n", "0.463247 | \n", "0.462348 | \n", "0.777506 | \n", "0.503243 | \n", "0.879424 | \n", "0.503081 | \n", "... | \n", "1.457503 | \n", "-0.19722 | \n", "-0.273387 | \n", "3.977884 | \n", "-0.073324 | \n", "-0.199302 | \n", "-0.19722 | \n", "-0.270527 | \n", "-0.284531 | \n", "0.060135 | \n", "
COc1ccc2cc(-c3nc(-c4ccc(S(C)=O)cc4)[nH]c3-c3ccncc3)ccc2c1 | \n", "0.944786 | \n", "0.248064 | \n", "0.295501 | \n", "0.318775 | \n", "0.412792 | \n", "0.268612 | \n", "0.506380 | \n", "0.178487 | \n", "0.384114 | \n", "0.284651 | \n", "... | \n", "1.457503 | \n", "-0.19722 | \n", "-0.273387 | \n", "-0.250038 | \n", "-0.073324 | \n", "-0.199302 | \n", "-0.19722 | \n", "-0.270527 | \n", "-0.284531 | \n", "-0.496321 | \n", "
COc1cc(Nc2c(C#N)cnc3cc(OCCCN4CCN(C)CC4)c(OC)cc23)c(Cl)cc1Cl | \n", "0.603507 | \n", "0.746555 | \n", "0.789836 | \n", "0.920548 | \n", "0.783927 | \n", "0.701164 | \n", "0.700273 | \n", "0.582936 | \n", "0.573394 | \n", "0.651072 | \n", "... | \n", "1.457503 | \n", "-0.19722 | \n", "-0.273387 | \n", "-0.250038 | \n", "-0.073324 | \n", "-0.199302 | \n", "-0.19722 | \n", "0.628009 | \n", "-0.284531 | \n", "-0.508369 | \n", "
COc1cc(C=CC(=O)CC(=O)C=Cc2ccc(O)c(OC)c2)ccc1O | \n", "-0.393464 | \n", "-0.067916 | \n", "-0.163103 | \n", "-0.268156 | \n", "-0.140663 | \n", "-0.313933 | \n", "-0.456744 | \n", "-0.450171 | \n", "-0.621424 | \n", "-0.534237 | \n", "... | \n", "-0.535847 | \n", "-0.19722 | \n", "-0.273387 | \n", "-0.250038 | \n", "-0.073324 | \n", "-0.199302 | \n", "-0.19722 | \n", "-0.270527 | \n", "-0.284531 | \n", "0.379172 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
CCS(=O)(=O)c1cccc(-c2cc(C(=O)NC3CCN(C)CC3)c(C)c3[nH]c4ncc(C)cc4c23)c1 | \n", "1.286344 | \n", "0.740428 | \n", "0.887842 | \n", "0.900547 | \n", "0.732054 | \n", "0.910649 | \n", "1.136028 | \n", "1.014856 | \n", "1.426770 | \n", "1.140227 | \n", "... | \n", "1.457503 | \n", "-0.19722 | \n", "-0.273387 | \n", "3.977884 | \n", "-0.073324 | \n", "-0.199302 | \n", "-0.19722 | \n", "-0.270527 | \n", "-0.284531 | \n", "-0.325822 | \n", "
CCN(CC)Cc1ccc2cc(COC(=O)Nc3ccc(C(=O)NO)cc3)ccc2c1.Cl.O | \n", "0.123124 | \n", "0.267321 | \n", "0.453349 | \n", "0.473807 | \n", "0.289093 | \n", "0.295629 | \n", "0.117262 | \n", "0.090182 | \n", "-0.130664 | \n", "0.137535 | \n", "... | \n", "-0.535847 | \n", "-0.19722 | \n", "-0.273387 | \n", "-0.250038 | \n", "-0.073324 | \n", "-0.199302 | \n", "-0.19722 | \n", "-0.270527 | \n", "-0.284531 | \n", "-0.746358 | \n", "
Cl.Cl.Cn1cc(CNCC2CCN(c3ncc(C(=O)NO)cn3)CC2)c2ccccc21 | \n", "-0.028239 | \n", "-0.011372 | \n", "0.230844 | \n", "0.391784 | \n", "0.111804 | \n", "0.200441 | \n", "0.027626 | \n", "0.214944 | \n", "-0.017353 | \n", "0.364936 | \n", "... | \n", "-0.535847 | \n", "-0.19722 | \n", "-0.273387 | \n", "-0.250038 | \n", "-0.073324 | \n", "-0.199302 | \n", "-0.19722 | \n", "-0.270527 | \n", "-0.284531 | \n", "-0.495217 | \n", "
CCS(=O)(=O)Nc1ccc2[nH]c(O)c(C(=Nc3ccc(CN4CCCCC4)cc3)c3ccccc3)c2c1 | \n", "1.115222 | \n", "0.770763 | \n", "0.847426 | \n", "0.860852 | \n", "0.884172 | \n", "0.997570 | \n", "1.217879 | \n", "0.869661 | \n", "1.210711 | \n", "0.882431 | \n", "... | \n", "-0.535847 | \n", "-0.19722 | \n", "3.638151 | \n", "-0.250038 | \n", "-0.073324 | \n", "-0.199302 | \n", "-0.19722 | \n", "-0.270527 | \n", "-0.284531 | \n", "-1.140448 | \n", "
CN1CCC(c2c(O)cc(O)c3c(=O)cc(-c4ccccc4Cl)oc23)C(O)C1.Cl | \n", "0.252492 | \n", "-0.041707 | \n", "-0.056939 | \n", "0.099009 | \n", "-0.050176 | \n", "-0.053351 | \n", "-0.110797 | \n", "0.095547 | \n", "0.002473 | \n", "0.210616 | \n", "... | \n", "-0.535847 | \n", "-0.19722 | \n", "-0.273387 | \n", "-0.250038 | \n", "-0.073324 | \n", "-0.199302 | \n", "-0.19722 | \n", "-0.270527 | \n", "-0.284531 | \n", "0.466665 | \n", "
186 rows × 177 columns
\n", "