{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# 6 BASELINE SCIPLEX DATASET"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**Requires**\n",
"sciplex_complete_middle_subset_lincs_genes.h5ad\n",
"\n",
"**Outputs**\n",
"adata_baseline_high_dose.h5ad\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import scanpy as sc\n",
"\n",
"from chemCPA.paths import DATA_DIR\n",
"\n",
"pd.set_option('display.max_columns', 200)"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[PosixPath('/nfs/staff-ssd/hetzell/code/chemCPA_v2/project_folder/datasets/sciplex_complete_middle_subset_lincs_genes.h5ad'),\n",
" PosixPath('/nfs/staff-ssd/hetzell/code/chemCPA_v2/project_folder/datasets/sciplex_complete_middle_subset.h5ad'),\n",
" PosixPath('/nfs/staff-ssd/hetzell/code/chemCPA_v2/project_folder/datasets/adata_baseline.h5ad'),\n",
" PosixPath('/nfs/staff-ssd/hetzell/code/chemCPA_v2/project_folder/datasets/preds_scgen_A549.h5ad')]"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"list(DATA_DIR.iterdir())"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"adata_sciplex = sc.read(DATA_DIR/ \"sciplex_complete_middle_subset_lincs_genes.h5ad\")"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Index(['cell_type', 'dose', 'dose_character', 'dose_pattern', 'g1s_score',\n",
" 'g2m_score', 'pathway', 'pathway_level_1', 'pathway_level_2',\n",
" 'product_dose', 'product_name', 'proliferation_index', 'replicate',\n",
" 'size_factor', 'target', 'vehicle', 'batch', 'n_counts', 'dose_val',\n",
" 'condition', 'drug_dose_name', 'cov_drug_dose_name', 'cov_drug',\n",
" 'control', 'split_ho_pathway', 'split_tyrosine_ood',\n",
" 'split_epigenetic_ood', 'split_cellcycle_ood', 'SMILES',\n",
" 'split_ood_finetuning', 'split_ho_epigenetic',\n",
" 'split_ho_epigenetic_all', 'split_random', 'split_ood_multi_task'],\n",
" dtype='object')"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"adata_sciplex.obs.columns"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['Quisinostat', 'Hesperadin', 'Flavopiridol', 'Belinostat', 'Alvespimycin', 'TAK-901', 'Dacinostat', 'Tanespimycin', 'Givinostat']\n",
"Categories (188, object): ['2-Methoxyestradiol', 'JQ1', 'A-366', 'ABT-737', ..., 'YM155', 'ZM', 'Zileuton', 'control']"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"adata_sciplex.obs.loc[adata_sciplex.obs.split_ood_multi_task == 'ood', 'condition'].unique()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[ 1000. 0. 100. 10000. 10.]\n"
]
}
],
"source": [
"# Subset to second largest dose\n",
"\n",
"print(adata_sciplex.obs.dose.unique())\n",
"adata_sciplex = adata_sciplex[adata_sciplex.obs.dose.isin([0., 1e4])].copy()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"A549\n",
"MCF7\n",
"K562\n"
]
}
],
"source": [
"# Add new splits for dose=1000 and cell_type (A549, MCF7, K562) being unseen for ood drugs \n",
"\n",
"for cell_type in adata_sciplex.obs.cell_type.unique():\n",
" print(cell_type)\n",
" adata_sciplex.obs[f'split_baseline_{cell_type}'] = adata_sciplex.obs['split_ood_multi_task']\n",
" sub_df = adata_sciplex.obs.loc[(adata_sciplex.obs[f'split_baseline_{cell_type}'] == 'ood') * (adata_sciplex.obs.cell_type != cell_type)]\n",
"\n",
" train_test = sub_df.index\n",
" test = sub_df.sample(frac=0.5).index \n",
"\n",
" adata_sciplex.obs.loc[train_test,f'split_baseline_{cell_type}'] = 'train'\n",
" adata_sciplex.obs.loc[test,f'split_baseline_{cell_type}'] = 'test'"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"train 107544\n",
"test 12008\n",
"ood 775\n",
"Name: split_baseline_A549, dtype: int64"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"adata_sciplex.obs['split_baseline_A549'].value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" condition | \n",
" 2-Methoxyestradiol | \n",
" JQ1 | \n",
" A-366 | \n",
" ABT-737 | \n",
" AC480 | \n",
" AG-490 | \n",
" AG-14361 | \n",
" AICAR | \n",
" AMG-900 | \n",
" AR-42 | \n",
" AT9283 | \n",
" AZ | \n",
" AZD1480 | \n",
" Abexinostat | \n",
" Alendronate | \n",
" Alisertib | \n",
" Altretamine | \n",
" Alvespimycin | \n",
" Aminoglutethimide | \n",
" Amisulpride | \n",
" Anacardic | \n",
" Andarine | \n",
" Aurora | \n",
" Avagacestat | \n",
" Azacitidine | \n",
" BMS-265246 | \n",
" BMS-536924 | \n",
" BMS-754807 | \n",
" BMS-911543 | \n",
" BRD4770 | \n",
" Barasertib | \n",
" Baricitinib | \n",
" Belinostat | \n",
" Bisindolylmaleimide | \n",
" Bosutinib | \n",
" Busulfan | \n",
" CEP-33779 | \n",
" CUDC-101 | \n",
" CUDC-907 | \n",
" CYC116 | \n",
" Capecitabine | \n",
" Carmofur | \n",
" Cediranib | \n",
" Celecoxib | \n",
" Cerdulatinib | \n",
" Cimetidine | \n",
" Clevudine | \n",
" Costunolide | \n",
" Crizotinib | \n",
" Curcumin | \n",
" Cyclocytidine | \n",
" Dacinostat | \n",
" Danusertib | \n",
" Daphnetin | \n",
" Dasatinib | \n",
" Decitabine | \n",
" Disulfiram | \n",
" Divalproex | \n",
" Droxinostat | \n",
" EED226 | \n",
" ENMD-2076 | \n",
" Ellagic | \n",
" Entacapone | \n",
" Entinostat | \n",
" Enzastaurin | \n",
" Epothilone | \n",
" FLLL32 | \n",
" Fasudil | \n",
" Fedratinib | \n",
" Filgotinib | \n",
" Flavopiridol | \n",
" Fluorouracil | \n",
" Fulvestrant | \n",
" G007-LK | \n",
" GSK | \n",
" GSK1070916 | \n",
" GSK-LSD1 | \n",
" Gandotinib | \n",
" Givinostat | \n",
" Glesatinib?(MGCD265) | \n",
" Hesperadin | \n",
" INO-1001 | \n",
" IOX2 | \n",
" ITSA-1 | \n",
" Iniparib | \n",
" Ivosidenib | \n",
" JNJ-7706621 | \n",
" JNJ-26854165 | \n",
" KW-2449 | \n",
" Ki8751 | \n",
" Ki16425 | \n",
" Lapatinib | \n",
" Lenalidomide | \n",
" Linifanib | \n",
" Lomustine | \n",
" Luminespib | \n",
" M344 | \n",
" MC1568 | \n",
" MK-0752 | \n",
" MK-5108 | \n",
" MLN8054 | \n",
" Maraviroc | \n",
" Meprednisone | \n",
" Mercaptopurine | \n",
" Mesna | \n",
" Mocetinostat | \n",
" Momelotinib | \n",
" Motesanib | \n",
" NVP-BSK805 | \n",
" Navitoclax | \n",
" Nilotinib | \n",
" Nintedanib | \n",
" Obatoclax | \n",
" Ofloxacin | \n",
" PCI-34051 | \n",
" PD98059 | \n",
" PD173074 | \n",
" PF-3845 | \n",
" PF-573228 | \n",
" PFI-1 | \n",
" PHA-680632 | \n",
" PJ34 | \n",
" Panobinostat | \n",
" Patupilone | \n",
" Pelitinib | \n",
" Pirarubicin | \n",
" Pracinostat | \n",
" Prednisone | \n",
" Quercetin | \n",
" Quisinostat | \n",
" RG108 | \n",
" Raltitrexed | \n",
" Ramelteon | \n",
" Regorafenib | \n",
" Resminostat | \n",
" Resveratrol | \n",
" Rigosertib | \n",
" Roscovitine | \n",
" Roxadustat | \n",
" Rucaparib | \n",
" Ruxolitinib | \n",
" S3I-201 | \n",
" S-Ruxolitinib | \n",
" SB431542 | \n",
" SGI-1776 | \n",
" SL-327 | \n",
" SNS-314 | \n",
" SRT1720 | \n",
" SRT2104 | \n",
" SRT3025 | \n",
" Selisistat | \n",
" Sirtinol | \n",
" Sodium | \n",
" Sorafenib | \n",
" Streptozotocin | \n",
" TAK-901 | \n",
" TG101209 | \n",
" TGX-221 | \n",
" TMP195 | \n",
" Tacedinaline | \n",
" Tanespimycin | \n",
" Tazemetostat | \n",
" Temsirolimus | \n",
" Thalidomide | \n",
" Thiotepa | \n",
" Tie2 | \n",
" Tofacitinib | \n",
" Toremifene | \n",
" Tozasertib | \n",
" Trametinib | \n",
" Tranylcypromine | \n",
" Triamcinolone | \n",
" Trichostatin | \n",
" Tubastatin | \n",
" Tucidinostat | \n",
" UNC0379 | \n",
" UNC0631 | \n",
" UNC1999 | \n",
" Valproic | \n",
" Vandetanib | \n",
" Veliparib | \n",
" WHI-P154 | \n",
" WP1066 | \n",
" XAV-939 | \n",
" YM155 | \n",
" ZM | \n",
" Zileuton | \n",
" control | \n",
"
\n",
" \n",
" split_ood_multi_task | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" ood | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 175 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 536 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 517 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 123 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 530 | \n",
" 0 | \n",
" 372 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 525 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 195 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 320 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
"
\n",
" \n",
" test | \n",
" 18 | \n",
" 298 | \n",
" 35 | \n",
" 27 | \n",
" 24 | \n",
" 31 | \n",
" 25 | \n",
" 35 | \n",
" 20 | \n",
" 221 | \n",
" 17 | \n",
" 14 | \n",
" 9 | \n",
" 277 | \n",
" 27 | \n",
" 139 | \n",
" 29 | \n",
" 0 | \n",
" 35 | \n",
" 23 | \n",
" 26 | \n",
" 34 | \n",
" 22 | \n",
" 32 | \n",
" 24 | \n",
" 20 | \n",
" 25 | \n",
" 162 | \n",
" 26 | \n",
" 28 | \n",
" 217 | \n",
" 26 | \n",
" 0 | \n",
" 1 | \n",
" 9 | \n",
" 23 | \n",
" 30 | \n",
" 150 | \n",
" 102 | \n",
" 25 | \n",
" 25 | \n",
" 24 | \n",
" 18 | \n",
" 23 | \n",
" 26 | \n",
" 23 | \n",
" 24 | \n",
" 32 | \n",
" 170 | \n",
" 30 | \n",
" 17 | \n",
" 0 | \n",
" 19 | \n",
" 25 | \n",
" 165 | \n",
" 30 | \n",
" 29 | \n",
" 33 | \n",
" 23 | \n",
" 39 | \n",
" 324 | \n",
" 34 | \n",
" 22 | \n",
" 249 | \n",
" 26 | \n",
" 125 | \n",
" 14 | \n",
" 25 | \n",
" 10 | \n",
" 16 | \n",
" 0 | \n",
" 31 | \n",
" 306 | \n",
" 23 | \n",
" 11 | \n",
" 195 | \n",
" 41 | \n",
" 20 | \n",
" 0 | \n",
" 26 | \n",
" 0 | \n",
" 23 | \n",
" 27 | \n",
" 22 | \n",
" 15 | \n",
" 15 | \n",
" 19 | \n",
" 25 | \n",
" 176 | \n",
" 23 | \n",
" 29 | \n",
" 17 | \n",
" 26 | \n",
" 23 | \n",
" 19 | \n",
" 144 | \n",
" 209 | \n",
" 30 | \n",
" 26 | \n",
" 14 | \n",
" 20 | \n",
" 25 | \n",
" 33 | \n",
" 16 | \n",
" 38 | \n",
" 161 | \n",
" 11 | \n",
" 22 | \n",
" 23 | \n",
" 12 | \n",
" 21 | \n",
" 153 | \n",
" 14 | \n",
" 30 | \n",
" 25 | \n",
" 27 | \n",
" 26 | \n",
" 25 | \n",
" 24 | \n",
" 28 | \n",
" 10 | \n",
" 30 | \n",
" 256 | \n",
" 149 | \n",
" 17 | \n",
" 104 | \n",
" 233 | \n",
" 28 | \n",
" 19 | \n",
" 0 | \n",
" 30 | \n",
" 181 | \n",
" 27 | \n",
" 16 | \n",
" 221 | \n",
" 25 | \n",
" 51 | \n",
" 31 | \n",
" 36 | \n",
" 26 | \n",
" 29 | \n",
" 34 | \n",
" 26 | \n",
" 32 | \n",
" 14 | \n",
" 33 | \n",
" 14 | \n",
" 34 | \n",
" 52 | \n",
" 25 | \n",
" 29 | \n",
" 32 | \n",
" 26 | \n",
" 15 | \n",
" 27 | \n",
" 0 | \n",
" 14 | \n",
" 21 | \n",
" 23 | \n",
" 305 | \n",
" 0 | \n",
" 40 | \n",
" 15 | \n",
" 27 | \n",
" 30 | \n",
" 20 | \n",
" 24 | \n",
" 13 | \n",
" 19 | \n",
" 139 | \n",
" 30 | \n",
" 37 | \n",
" 212 | \n",
" 22 | \n",
" 244 | \n",
" 19 | \n",
" 32 | \n",
" 26 | \n",
" 30 | \n",
" 16 | \n",
" 24 | \n",
" 30 | \n",
" 18 | \n",
" 10 | \n",
" 45 | \n",
" 16 | \n",
" 29 | \n",
" 1132 | \n",
"
\n",
" \n",
" train | \n",
" 385 | \n",
" 362 | \n",
" 686 | \n",
" 528 | \n",
" 538 | \n",
" 725 | \n",
" 620 | \n",
" 690 | \n",
" 572 | \n",
" 281 | \n",
" 363 | \n",
" 392 | \n",
" 322 | \n",
" 306 | \n",
" 565 | \n",
" 211 | \n",
" 618 | \n",
" 0 | \n",
" 762 | \n",
" 732 | \n",
" 716 | \n",
" 802 | \n",
" 555 | \n",
" 693 | \n",
" 602 | \n",
" 493 | \n",
" 499 | \n",
" 224 | \n",
" 631 | \n",
" 689 | \n",
" 281 | \n",
" 802 | \n",
" 0 | \n",
" 52 | \n",
" 365 | \n",
" 753 | \n",
" 735 | \n",
" 350 | \n",
" 210 | \n",
" 419 | \n",
" 799 | \n",
" 628 | \n",
" 431 | \n",
" 724 | \n",
" 582 | \n",
" 450 | \n",
" 704 | \n",
" 713 | \n",
" 224 | \n",
" 744 | \n",
" 464 | \n",
" 0 | \n",
" 380 | \n",
" 653 | \n",
" 237 | \n",
" 545 | \n",
" 688 | \n",
" 687 | \n",
" 664 | \n",
" 713 | \n",
" 415 | \n",
" 724 | \n",
" 716 | \n",
" 330 | \n",
" 564 | \n",
" 182 | \n",
" 492 | \n",
" 796 | \n",
" 257 | \n",
" 675 | \n",
" 0 | \n",
" 597 | \n",
" 405 | \n",
" 557 | \n",
" 692 | \n",
" 227 | \n",
" 749 | \n",
" 457 | \n",
" 0 | \n",
" 412 | \n",
" 0 | \n",
" 731 | \n",
" 749 | \n",
" 542 | \n",
" 412 | \n",
" 325 | \n",
" 466 | \n",
" 573 | \n",
" 245 | \n",
" 563 | \n",
" 719 | \n",
" 390 | \n",
" 655 | \n",
" 584 | \n",
" 529 | \n",
" 200 | \n",
" 306 | \n",
" 722 | \n",
" 594 | \n",
" 397 | \n",
" 555 | \n",
" 669 | \n",
" 781 | \n",
" 379 | \n",
" 713 | \n",
" 200 | \n",
" 403 | \n",
" 692 | \n",
" 625 | \n",
" 320 | \n",
" 594 | \n",
" 211 | \n",
" 329 | \n",
" 738 | \n",
" 517 | \n",
" 767 | \n",
" 611 | \n",
" 527 | \n",
" 398 | \n",
" 609 | \n",
" 320 | \n",
" 593 | \n",
" 336 | \n",
" 185 | \n",
" 365 | \n",
" 183 | \n",
" 326 | \n",
" 673 | \n",
" 588 | \n",
" 0 | \n",
" 752 | \n",
" 397 | \n",
" 672 | \n",
" 225 | \n",
" 335 | \n",
" 670 | \n",
" 65 | \n",
" 657 | \n",
" 726 | \n",
" 512 | \n",
" 789 | \n",
" 740 | \n",
" 674 | \n",
" 746 | \n",
" 380 | \n",
" 578 | \n",
" 451 | \n",
" 530 | \n",
" 748 | \n",
" 618 | \n",
" 756 | \n",
" 728 | \n",
" 674 | \n",
" 372 | \n",
" 735 | \n",
" 0 | \n",
" 300 | \n",
" 621 | \n",
" 629 | \n",
" 389 | \n",
" 0 | \n",
" 759 | \n",
" 376 | \n",
" 668 | \n",
" 660 | \n",
" 707 | \n",
" 726 | \n",
" 362 | \n",
" 440 | \n",
" 328 | \n",
" 768 | \n",
" 767 | \n",
" 280 | \n",
" 738 | \n",
" 265 | \n",
" 420 | \n",
" 704 | \n",
" 674 | \n",
" 704 | \n",
" 318 | \n",
" 605 | \n",
" 630 | \n",
" 710 | \n",
" 397 | \n",
" 51 | \n",
" 502 | \n",
" 670 | \n",
" 11872 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
"condition 2-Methoxyestradiol JQ1 A-366 ABT-737 AC480 AG-490 \\\n",
"split_ood_multi_task \n",
"ood 0 0 0 0 0 0 \n",
"test 18 298 35 27 24 31 \n",
"train 385 362 686 528 538 725 \n",
"\n",
"condition AG-14361 AICAR AMG-900 AR-42 AT9283 AZ AZD1480 \\\n",
"split_ood_multi_task \n",
"ood 0 0 0 0 0 0 0 \n",
"test 25 35 20 221 17 14 9 \n",
"train 620 690 572 281 363 392 322 \n",
"\n",
"condition Abexinostat Alendronate Alisertib Altretamine \\\n",
"split_ood_multi_task \n",
"ood 0 0 0 0 \n",
"test 277 27 139 29 \n",
"train 306 565 211 618 \n",
"\n",
"condition Alvespimycin Aminoglutethimide Amisulpride Anacardic \\\n",
"split_ood_multi_task \n",
"ood 175 0 0 0 \n",
"test 0 35 23 26 \n",
"train 0 762 732 716 \n",
"\n",
"condition Andarine Aurora Avagacestat Azacitidine BMS-265246 \\\n",
"split_ood_multi_task \n",
"ood 0 0 0 0 0 \n",
"test 34 22 32 24 20 \n",
"train 802 555 693 602 493 \n",
"\n",
"condition BMS-536924 BMS-754807 BMS-911543 BRD4770 Barasertib \\\n",
"split_ood_multi_task \n",
"ood 0 0 0 0 0 \n",
"test 25 162 26 28 217 \n",
"train 499 224 631 689 281 \n",
"\n",
"condition Baricitinib Belinostat Bisindolylmaleimide Bosutinib \\\n",
"split_ood_multi_task \n",
"ood 0 536 0 0 \n",
"test 26 0 1 9 \n",
"train 802 0 52 365 \n",
"\n",
"condition Busulfan CEP-33779 CUDC-101 CUDC-907 CYC116 \\\n",
"split_ood_multi_task \n",
"ood 0 0 0 0 0 \n",
"test 23 30 150 102 25 \n",
"train 753 735 350 210 419 \n",
"\n",
"condition Capecitabine Carmofur Cediranib Celecoxib \\\n",
"split_ood_multi_task \n",
"ood 0 0 0 0 \n",
"test 25 24 18 23 \n",
"train 799 628 431 724 \n",
"\n",
"condition Cerdulatinib Cimetidine Clevudine Costunolide \\\n",
"split_ood_multi_task \n",
"ood 0 0 0 0 \n",
"test 26 23 24 32 \n",
"train 582 450 704 713 \n",
"\n",
"condition Crizotinib Curcumin Cyclocytidine Dacinostat \\\n",
"split_ood_multi_task \n",
"ood 0 0 0 517 \n",
"test 170 30 17 0 \n",
"train 224 744 464 0 \n",
"\n",
"condition Danusertib Daphnetin Dasatinib Decitabine \\\n",
"split_ood_multi_task \n",
"ood 0 0 0 0 \n",
"test 19 25 165 30 \n",
"train 380 653 237 545 \n",
"\n",
"condition Disulfiram Divalproex Droxinostat EED226 ENMD-2076 \\\n",
"split_ood_multi_task \n",
"ood 0 0 0 0 0 \n",
"test 29 33 23 39 324 \n",
"train 688 687 664 713 415 \n",
"\n",
"condition Ellagic Entacapone Entinostat Enzastaurin \\\n",
"split_ood_multi_task \n",
"ood 0 0 0 0 \n",
"test 34 22 249 26 \n",
"train 724 716 330 564 \n",
"\n",
"condition Epothilone FLLL32 Fasudil Fedratinib Filgotinib \\\n",
"split_ood_multi_task \n",
"ood 0 0 0 0 0 \n",
"test 125 14 25 10 16 \n",
"train 182 492 796 257 675 \n",
"\n",
"condition Flavopiridol Fluorouracil Fulvestrant G007-LK GSK \\\n",
"split_ood_multi_task \n",
"ood 123 0 0 0 0 \n",
"test 0 31 306 23 11 \n",
"train 0 597 405 557 692 \n",
"\n",
"condition GSK1070916 GSK-LSD1 Gandotinib Givinostat \\\n",
"split_ood_multi_task \n",
"ood 0 0 0 530 \n",
"test 195 41 20 0 \n",
"train 227 749 457 0 \n",
"\n",
"condition Glesatinib?(MGCD265) Hesperadin INO-1001 IOX2 \\\n",
"split_ood_multi_task \n",
"ood 0 372 0 0 \n",
"test 26 0 23 27 \n",
"train 412 0 731 749 \n",
"\n",
"condition ITSA-1 Iniparib Ivosidenib JNJ-7706621 JNJ-26854165 \\\n",
"split_ood_multi_task \n",
"ood 0 0 0 0 0 \n",
"test 22 15 15 19 25 \n",
"train 542 412 325 466 573 \n",
"\n",
"condition KW-2449 Ki8751 Ki16425 Lapatinib Lenalidomide \\\n",
"split_ood_multi_task \n",
"ood 0 0 0 0 0 \n",
"test 176 23 29 17 26 \n",
"train 245 563 719 390 655 \n",
"\n",
"condition Linifanib Lomustine Luminespib M344 MC1568 MK-0752 \\\n",
"split_ood_multi_task \n",
"ood 0 0 0 0 0 0 \n",
"test 23 19 144 209 30 26 \n",
"train 584 529 200 306 722 594 \n",
"\n",
"condition MK-5108 MLN8054 Maraviroc Meprednisone \\\n",
"split_ood_multi_task \n",
"ood 0 0 0 0 \n",
"test 14 20 25 33 \n",
"train 397 555 669 781 \n",
"\n",
"condition Mercaptopurine Mesna Mocetinostat Momelotinib \\\n",
"split_ood_multi_task \n",
"ood 0 0 0 0 \n",
"test 16 38 161 11 \n",
"train 379 713 200 403 \n",
"\n",
"condition Motesanib NVP-BSK805 Navitoclax Nilotinib \\\n",
"split_ood_multi_task \n",
"ood 0 0 0 0 \n",
"test 22 23 12 21 \n",
"train 692 625 320 594 \n",
"\n",
"condition Nintedanib Obatoclax Ofloxacin PCI-34051 PD98059 \\\n",
"split_ood_multi_task \n",
"ood 0 0 0 0 0 \n",
"test 153 14 30 25 27 \n",
"train 211 329 738 517 767 \n",
"\n",
"condition PD173074 PF-3845 PF-573228 PFI-1 PHA-680632 PJ34 \\\n",
"split_ood_multi_task \n",
"ood 0 0 0 0 0 0 \n",
"test 26 25 24 28 10 30 \n",
"train 611 527 398 609 320 593 \n",
"\n",
"condition Panobinostat Patupilone Pelitinib Pirarubicin \\\n",
"split_ood_multi_task \n",
"ood 0 0 0 0 \n",
"test 256 149 17 104 \n",
"train 336 185 365 183 \n",
"\n",
"condition Pracinostat Prednisone Quercetin Quisinostat RG108 \\\n",
"split_ood_multi_task \n",
"ood 0 0 0 525 0 \n",
"test 233 28 19 0 30 \n",
"train 326 673 588 0 752 \n",
"\n",
"condition Raltitrexed Ramelteon Regorafenib Resminostat \\\n",
"split_ood_multi_task \n",
"ood 0 0 0 0 \n",
"test 181 27 16 221 \n",
"train 397 672 225 335 \n",
"\n",
"condition Resveratrol Rigosertib Roscovitine Roxadustat \\\n",
"split_ood_multi_task \n",
"ood 0 0 0 0 \n",
"test 25 51 31 36 \n",
"train 670 65 657 726 \n",
"\n",
"condition Rucaparib Ruxolitinib S3I-201 S-Ruxolitinib \\\n",
"split_ood_multi_task \n",
"ood 0 0 0 0 \n",
"test 26 29 34 26 \n",
"train 512 789 740 674 \n",
"\n",
"condition SB431542 SGI-1776 SL-327 SNS-314 SRT1720 SRT2104 \\\n",
"split_ood_multi_task \n",
"ood 0 0 0 0 0 0 \n",
"test 32 14 33 14 34 52 \n",
"train 746 380 578 451 530 748 \n",
"\n",
"condition SRT3025 Selisistat Sirtinol Sodium Sorafenib \\\n",
"split_ood_multi_task \n",
"ood 0 0 0 0 0 \n",
"test 25 29 32 26 15 \n",
"train 618 756 728 674 372 \n",
"\n",
"condition Streptozotocin TAK-901 TG101209 TGX-221 TMP195 \\\n",
"split_ood_multi_task \n",
"ood 0 195 0 0 0 \n",
"test 27 0 14 21 23 \n",
"train 735 0 300 621 629 \n",
"\n",
"condition Tacedinaline Tanespimycin Tazemetostat Temsirolimus \\\n",
"split_ood_multi_task \n",
"ood 0 320 0 0 \n",
"test 305 0 40 15 \n",
"train 389 0 759 376 \n",
"\n",
"condition Thalidomide Thiotepa Tie2 Tofacitinib Toremifene \\\n",
"split_ood_multi_task \n",
"ood 0 0 0 0 0 \n",
"test 27 30 20 24 13 \n",
"train 668 660 707 726 362 \n",
"\n",
"condition Tozasertib Trametinib Tranylcypromine Triamcinolone \\\n",
"split_ood_multi_task \n",
"ood 0 0 0 0 \n",
"test 19 139 30 37 \n",
"train 440 328 768 767 \n",
"\n",
"condition Trichostatin Tubastatin Tucidinostat UNC0379 \\\n",
"split_ood_multi_task \n",
"ood 0 0 0 0 \n",
"test 212 22 244 19 \n",
"train 280 738 265 420 \n",
"\n",
"condition UNC0631 UNC1999 Valproic Vandetanib Veliparib \\\n",
"split_ood_multi_task \n",
"ood 0 0 0 0 0 \n",
"test 32 26 30 16 24 \n",
"train 704 674 704 318 605 \n",
"\n",
"condition WHI-P154 WP1066 XAV-939 YM155 ZM Zileuton control \n",
"split_ood_multi_task \n",
"ood 0 0 0 0 0 0 0 \n",
"test 30 18 10 45 16 29 1132 \n",
"train 630 710 397 51 502 670 11872 "
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.crosstab(adata_sciplex.obs['split_ood_multi_task'], adata_sciplex.obs['condition'])"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" cell_type | \n",
" A549 | \n",
" K562 | \n",
" MCF7 | \n",
"
\n",
" \n",
" split_baseline_K562 | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" ood | \n",
" 0 | \n",
" 543 | \n",
" 0 | \n",
"
\n",
" \n",
" test | \n",
" 3108 | \n",
" 2386 | \n",
" 6630 | \n",
"
\n",
" \n",
" train | \n",
" 26941 | \n",
" 26527 | \n",
" 54192 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
"cell_type A549 K562 MCF7\n",
"split_baseline_K562 \n",
"ood 0 543 0\n",
"test 3108 2386 6630\n",
"train 26941 26527 54192"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Quick check that everything is correct\n",
"\n",
"cell_type = 'K562'\n",
"\n",
"# pd.crosstab(adata_sciplex.obs[f'split_baseline_{cell_type}'], adata_sciplex.obs['condition'])\n",
"pd.crosstab(adata_sciplex.obs[f'split_baseline_{cell_type}'], adata_sciplex.obs['cell_type'])"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"# write adata \n",
"\n",
"adata_sciplex.write(DATA_DIR/'adata_baseline_high_dose.h5ad', compression=\"gzip\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3.7.12 ('chemical_CPA')",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.12"
},
"orig_nbformat": 4,
"vscode": {
"interpreter": {
"hash": "45879ff77d613949b37d9f94260a6a718c11df1c0993b072c2b5b60153db7170"
}
}
},
"nbformat": 4,
"nbformat_minor": 2
}