{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# 6 BASELINE SCIPLEX DATASET" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**Requires**\n", "sciplex_complete_middle_subset_lincs_genes.h5ad\n", "\n", "**Outputs**\n", "adata_baseline_high_dose.h5ad\n", "\n" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import scanpy as sc\n", "\n", "from chemCPA.paths import DATA_DIR\n", "\n", "pd.set_option('display.max_columns', 200)" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[PosixPath('/nfs/staff-ssd/hetzell/code/chemCPA_v2/project_folder/datasets/sciplex_complete_middle_subset_lincs_genes.h5ad'),\n", " PosixPath('/nfs/staff-ssd/hetzell/code/chemCPA_v2/project_folder/datasets/sciplex_complete_middle_subset.h5ad'),\n", " PosixPath('/nfs/staff-ssd/hetzell/code/chemCPA_v2/project_folder/datasets/adata_baseline.h5ad'),\n", " PosixPath('/nfs/staff-ssd/hetzell/code/chemCPA_v2/project_folder/datasets/preds_scgen_A549.h5ad')]" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "list(DATA_DIR.iterdir())" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "adata_sciplex = sc.read(DATA_DIR/ \"sciplex_complete_middle_subset_lincs_genes.h5ad\")" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Index(['cell_type', 'dose', 'dose_character', 'dose_pattern', 'g1s_score',\n", " 'g2m_score', 'pathway', 'pathway_level_1', 'pathway_level_2',\n", " 'product_dose', 'product_name', 'proliferation_index', 'replicate',\n", " 'size_factor', 'target', 'vehicle', 'batch', 'n_counts', 'dose_val',\n", " 'condition', 'drug_dose_name', 'cov_drug_dose_name', 'cov_drug',\n", " 'control', 'split_ho_pathway', 'split_tyrosine_ood',\n", " 'split_epigenetic_ood', 'split_cellcycle_ood', 'SMILES',\n", " 'split_ood_finetuning', 'split_ho_epigenetic',\n", " 'split_ho_epigenetic_all', 'split_random', 'split_ood_multi_task'],\n", " dtype='object')" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "adata_sciplex.obs.columns" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['Quisinostat', 'Hesperadin', 'Flavopiridol', 'Belinostat', 'Alvespimycin', 'TAK-901', 'Dacinostat', 'Tanespimycin', 'Givinostat']\n", "Categories (188, object): ['2-Methoxyestradiol', 'JQ1', 'A-366', 'ABT-737', ..., 'YM155', 'ZM', 'Zileuton', 'control']" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "adata_sciplex.obs.loc[adata_sciplex.obs.split_ood_multi_task == 'ood', 'condition'].unique()" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[ 1000. 0. 100. 10000. 10.]\n" ] } ], "source": [ "# Subset to second largest dose\n", "\n", "print(adata_sciplex.obs.dose.unique())\n", "adata_sciplex = adata_sciplex[adata_sciplex.obs.dose.isin([0., 1e4])].copy()" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "A549\n", "MCF7\n", "K562\n" ] } ], "source": [ "# Add new splits for dose=1000 and cell_type (A549, MCF7, K562) being unseen for ood drugs \n", "\n", "for cell_type in adata_sciplex.obs.cell_type.unique():\n", " print(cell_type)\n", " adata_sciplex.obs[f'split_baseline_{cell_type}'] = adata_sciplex.obs['split_ood_multi_task']\n", " sub_df = adata_sciplex.obs.loc[(adata_sciplex.obs[f'split_baseline_{cell_type}'] == 'ood') * (adata_sciplex.obs.cell_type != cell_type)]\n", "\n", " train_test = sub_df.index\n", " test = sub_df.sample(frac=0.5).index \n", "\n", " adata_sciplex.obs.loc[train_test,f'split_baseline_{cell_type}'] = 'train'\n", " adata_sciplex.obs.loc[test,f'split_baseline_{cell_type}'] = 'test'" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "train 107544\n", "test 12008\n", "ood 775\n", "Name: split_baseline_A549, dtype: int64" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "adata_sciplex.obs['split_baseline_A549'].value_counts()" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
condition2-MethoxyestradiolJQ1A-366ABT-737AC480AG-490AG-14361AICARAMG-900AR-42AT9283AZAZD1480AbexinostatAlendronateAlisertibAltretamineAlvespimycinAminoglutethimideAmisulprideAnacardicAndarineAuroraAvagacestatAzacitidineBMS-265246BMS-536924BMS-754807BMS-911543BRD4770BarasertibBaricitinibBelinostatBisindolylmaleimideBosutinibBusulfanCEP-33779CUDC-101CUDC-907CYC116CapecitabineCarmofurCediranibCelecoxibCerdulatinibCimetidineClevudineCostunolideCrizotinibCurcuminCyclocytidineDacinostatDanusertibDaphnetinDasatinibDecitabineDisulfiramDivalproexDroxinostatEED226ENMD-2076EllagicEntacaponeEntinostatEnzastaurinEpothiloneFLLL32FasudilFedratinibFilgotinibFlavopiridolFluorouracilFulvestrantG007-LKGSKGSK1070916GSK-LSD1GandotinibGivinostatGlesatinib?(MGCD265)HesperadinINO-1001IOX2ITSA-1IniparibIvosidenibJNJ-7706621JNJ-26854165KW-2449Ki8751Ki16425LapatinibLenalidomideLinifanibLomustineLuminespibM344MC1568MK-0752MK-5108MLN8054MaravirocMeprednisoneMercaptopurineMesnaMocetinostatMomelotinibMotesanibNVP-BSK805NavitoclaxNilotinibNintedanibObatoclaxOfloxacinPCI-34051PD98059PD173074PF-3845PF-573228PFI-1PHA-680632PJ34PanobinostatPatupilonePelitinibPirarubicinPracinostatPrednisoneQuercetinQuisinostatRG108RaltitrexedRamelteonRegorafenibResminostatResveratrolRigosertibRoscovitineRoxadustatRucaparibRuxolitinibS3I-201S-RuxolitinibSB431542SGI-1776SL-327SNS-314SRT1720SRT2104SRT3025SelisistatSirtinolSodiumSorafenibStreptozotocinTAK-901TG101209TGX-221TMP195TacedinalineTanespimycinTazemetostatTemsirolimusThalidomideThiotepaTie2TofacitinibToremifeneTozasertibTrametinibTranylcypromineTriamcinoloneTrichostatinTubastatinTucidinostatUNC0379UNC0631UNC1999ValproicVandetanibVeliparibWHI-P154WP1066XAV-939YM155ZMZileutoncontrol
split_ood_multi_task
ood00000000000000000175000000000000005360000000000000000005170000000000000000001230000000530037200000000000000000000000000000000000000000000000052500000000000000000000000001950000320000000000000000000000000000
test182983527243125352022117149277271392903523263422322420251622628217260192330150102252524182326232432170301701925165302933233932434222492612514251016031306231119541200260232722151519251762329172623191442093026142025331638161112223122115314302527262524281030256149171042332819030181271622125513136262934263214331434522529322615270142123305040152730202413191393037212222441932263016243018104516291132
train38536268652853872562069057228136339232230656521161807627327168025556936024934992246316892818020523657537353502104197996284317245824507047132247444640380653237545688687664713415724716330564182492796257675059740555769222774945704120731749542412325466573245563719390655584529200306722594397555669781379713200403692625320594211329738517767611527398609320593336185365183326673588075239767222533567065657726512789740674746380578451530748618756728674372735030062162938907593766686607077263624403287687672807382654207046747043186056307103975150267011872
\n", "
" ], "text/plain": [ "condition 2-Methoxyestradiol JQ1 A-366 ABT-737 AC480 AG-490 \\\n", "split_ood_multi_task \n", "ood 0 0 0 0 0 0 \n", "test 18 298 35 27 24 31 \n", "train 385 362 686 528 538 725 \n", "\n", "condition AG-14361 AICAR AMG-900 AR-42 AT9283 AZ AZD1480 \\\n", "split_ood_multi_task \n", "ood 0 0 0 0 0 0 0 \n", "test 25 35 20 221 17 14 9 \n", "train 620 690 572 281 363 392 322 \n", "\n", "condition Abexinostat Alendronate Alisertib Altretamine \\\n", "split_ood_multi_task \n", "ood 0 0 0 0 \n", "test 277 27 139 29 \n", "train 306 565 211 618 \n", "\n", "condition Alvespimycin Aminoglutethimide Amisulpride Anacardic \\\n", "split_ood_multi_task \n", "ood 175 0 0 0 \n", "test 0 35 23 26 \n", "train 0 762 732 716 \n", "\n", "condition Andarine Aurora Avagacestat Azacitidine BMS-265246 \\\n", "split_ood_multi_task \n", "ood 0 0 0 0 0 \n", "test 34 22 32 24 20 \n", "train 802 555 693 602 493 \n", "\n", "condition BMS-536924 BMS-754807 BMS-911543 BRD4770 Barasertib \\\n", "split_ood_multi_task \n", "ood 0 0 0 0 0 \n", "test 25 162 26 28 217 \n", "train 499 224 631 689 281 \n", "\n", "condition Baricitinib Belinostat Bisindolylmaleimide Bosutinib \\\n", "split_ood_multi_task \n", "ood 0 536 0 0 \n", "test 26 0 1 9 \n", "train 802 0 52 365 \n", "\n", "condition Busulfan CEP-33779 CUDC-101 CUDC-907 CYC116 \\\n", "split_ood_multi_task \n", "ood 0 0 0 0 0 \n", "test 23 30 150 102 25 \n", "train 753 735 350 210 419 \n", "\n", "condition Capecitabine Carmofur Cediranib Celecoxib \\\n", "split_ood_multi_task \n", "ood 0 0 0 0 \n", "test 25 24 18 23 \n", "train 799 628 431 724 \n", "\n", "condition Cerdulatinib Cimetidine Clevudine Costunolide \\\n", "split_ood_multi_task \n", "ood 0 0 0 0 \n", "test 26 23 24 32 \n", "train 582 450 704 713 \n", "\n", "condition Crizotinib Curcumin Cyclocytidine Dacinostat \\\n", "split_ood_multi_task \n", "ood 0 0 0 517 \n", "test 170 30 17 0 \n", "train 224 744 464 0 \n", "\n", "condition Danusertib Daphnetin Dasatinib Decitabine \\\n", "split_ood_multi_task \n", "ood 0 0 0 0 \n", "test 19 25 165 30 \n", "train 380 653 237 545 \n", "\n", "condition Disulfiram Divalproex Droxinostat EED226 ENMD-2076 \\\n", "split_ood_multi_task \n", "ood 0 0 0 0 0 \n", "test 29 33 23 39 324 \n", "train 688 687 664 713 415 \n", "\n", "condition Ellagic Entacapone Entinostat Enzastaurin \\\n", "split_ood_multi_task \n", "ood 0 0 0 0 \n", "test 34 22 249 26 \n", "train 724 716 330 564 \n", "\n", "condition Epothilone FLLL32 Fasudil Fedratinib Filgotinib \\\n", "split_ood_multi_task \n", "ood 0 0 0 0 0 \n", "test 125 14 25 10 16 \n", "train 182 492 796 257 675 \n", "\n", "condition Flavopiridol Fluorouracil Fulvestrant G007-LK GSK \\\n", "split_ood_multi_task \n", "ood 123 0 0 0 0 \n", "test 0 31 306 23 11 \n", "train 0 597 405 557 692 \n", "\n", "condition GSK1070916 GSK-LSD1 Gandotinib Givinostat \\\n", "split_ood_multi_task \n", "ood 0 0 0 530 \n", "test 195 41 20 0 \n", "train 227 749 457 0 \n", "\n", "condition Glesatinib?(MGCD265) Hesperadin INO-1001 IOX2 \\\n", "split_ood_multi_task \n", "ood 0 372 0 0 \n", "test 26 0 23 27 \n", "train 412 0 731 749 \n", "\n", "condition ITSA-1 Iniparib Ivosidenib JNJ-7706621 JNJ-26854165 \\\n", "split_ood_multi_task \n", "ood 0 0 0 0 0 \n", "test 22 15 15 19 25 \n", "train 542 412 325 466 573 \n", "\n", "condition KW-2449 Ki8751 Ki16425 Lapatinib Lenalidomide \\\n", "split_ood_multi_task \n", "ood 0 0 0 0 0 \n", "test 176 23 29 17 26 \n", "train 245 563 719 390 655 \n", "\n", "condition Linifanib Lomustine Luminespib M344 MC1568 MK-0752 \\\n", "split_ood_multi_task \n", "ood 0 0 0 0 0 0 \n", "test 23 19 144 209 30 26 \n", "train 584 529 200 306 722 594 \n", "\n", "condition MK-5108 MLN8054 Maraviroc Meprednisone \\\n", "split_ood_multi_task \n", "ood 0 0 0 0 \n", "test 14 20 25 33 \n", "train 397 555 669 781 \n", "\n", "condition Mercaptopurine Mesna Mocetinostat Momelotinib \\\n", "split_ood_multi_task \n", "ood 0 0 0 0 \n", "test 16 38 161 11 \n", "train 379 713 200 403 \n", "\n", "condition Motesanib NVP-BSK805 Navitoclax Nilotinib \\\n", "split_ood_multi_task \n", "ood 0 0 0 0 \n", "test 22 23 12 21 \n", "train 692 625 320 594 \n", "\n", "condition Nintedanib Obatoclax Ofloxacin PCI-34051 PD98059 \\\n", "split_ood_multi_task \n", "ood 0 0 0 0 0 \n", "test 153 14 30 25 27 \n", "train 211 329 738 517 767 \n", "\n", "condition PD173074 PF-3845 PF-573228 PFI-1 PHA-680632 PJ34 \\\n", "split_ood_multi_task \n", "ood 0 0 0 0 0 0 \n", "test 26 25 24 28 10 30 \n", "train 611 527 398 609 320 593 \n", "\n", "condition Panobinostat Patupilone Pelitinib Pirarubicin \\\n", "split_ood_multi_task \n", "ood 0 0 0 0 \n", "test 256 149 17 104 \n", "train 336 185 365 183 \n", "\n", "condition Pracinostat Prednisone Quercetin Quisinostat RG108 \\\n", "split_ood_multi_task \n", "ood 0 0 0 525 0 \n", "test 233 28 19 0 30 \n", "train 326 673 588 0 752 \n", "\n", "condition Raltitrexed Ramelteon Regorafenib Resminostat \\\n", "split_ood_multi_task \n", "ood 0 0 0 0 \n", "test 181 27 16 221 \n", "train 397 672 225 335 \n", "\n", "condition Resveratrol Rigosertib Roscovitine Roxadustat \\\n", "split_ood_multi_task \n", "ood 0 0 0 0 \n", "test 25 51 31 36 \n", "train 670 65 657 726 \n", "\n", "condition Rucaparib Ruxolitinib S3I-201 S-Ruxolitinib \\\n", "split_ood_multi_task \n", "ood 0 0 0 0 \n", "test 26 29 34 26 \n", "train 512 789 740 674 \n", "\n", "condition SB431542 SGI-1776 SL-327 SNS-314 SRT1720 SRT2104 \\\n", "split_ood_multi_task \n", "ood 0 0 0 0 0 0 \n", "test 32 14 33 14 34 52 \n", "train 746 380 578 451 530 748 \n", "\n", "condition SRT3025 Selisistat Sirtinol Sodium Sorafenib \\\n", "split_ood_multi_task \n", "ood 0 0 0 0 0 \n", "test 25 29 32 26 15 \n", "train 618 756 728 674 372 \n", "\n", "condition Streptozotocin TAK-901 TG101209 TGX-221 TMP195 \\\n", "split_ood_multi_task \n", "ood 0 195 0 0 0 \n", "test 27 0 14 21 23 \n", "train 735 0 300 621 629 \n", "\n", "condition Tacedinaline Tanespimycin Tazemetostat Temsirolimus \\\n", "split_ood_multi_task \n", "ood 0 320 0 0 \n", "test 305 0 40 15 \n", "train 389 0 759 376 \n", "\n", "condition Thalidomide Thiotepa Tie2 Tofacitinib Toremifene \\\n", "split_ood_multi_task \n", "ood 0 0 0 0 0 \n", "test 27 30 20 24 13 \n", "train 668 660 707 726 362 \n", "\n", "condition Tozasertib Trametinib Tranylcypromine Triamcinolone \\\n", "split_ood_multi_task \n", "ood 0 0 0 0 \n", "test 19 139 30 37 \n", "train 440 328 768 767 \n", "\n", "condition Trichostatin Tubastatin Tucidinostat UNC0379 \\\n", "split_ood_multi_task \n", "ood 0 0 0 0 \n", "test 212 22 244 19 \n", "train 280 738 265 420 \n", "\n", "condition UNC0631 UNC1999 Valproic Vandetanib Veliparib \\\n", "split_ood_multi_task \n", "ood 0 0 0 0 0 \n", "test 32 26 30 16 24 \n", "train 704 674 704 318 605 \n", "\n", "condition WHI-P154 WP1066 XAV-939 YM155 ZM Zileuton control \n", "split_ood_multi_task \n", "ood 0 0 0 0 0 0 0 \n", "test 30 18 10 45 16 29 1132 \n", "train 630 710 397 51 502 670 11872 " ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.crosstab(adata_sciplex.obs['split_ood_multi_task'], adata_sciplex.obs['condition'])" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
cell_typeA549K562MCF7
split_baseline_K562
ood05430
test310823866630
train269412652754192
\n", "
" ], "text/plain": [ "cell_type A549 K562 MCF7\n", "split_baseline_K562 \n", "ood 0 543 0\n", "test 3108 2386 6630\n", "train 26941 26527 54192" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Quick check that everything is correct\n", "\n", "cell_type = 'K562'\n", "\n", "# pd.crosstab(adata_sciplex.obs[f'split_baseline_{cell_type}'], adata_sciplex.obs['condition'])\n", "pd.crosstab(adata_sciplex.obs[f'split_baseline_{cell_type}'], adata_sciplex.obs['cell_type'])" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "# write adata \n", "\n", "adata_sciplex.write(DATA_DIR/'adata_baseline_high_dose.h5ad', compression=\"gzip\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3.7.12 ('chemical_CPA')", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.12" }, "orig_nbformat": 4, "vscode": { "interpreter": { "hash": "45879ff77d613949b37d9f94260a6a718c11df1c0993b072c2b5b60153db7170" } } }, "nbformat": 4, "nbformat_minor": 2 }