{ "cells": [ { "cell_type": "markdown", "metadata": { "id": "RzzY3RdcsgYs" }, "source": [ "# Load packages" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "executionInfo": { "elapsed": 41539, "status": "ok", "timestamp": 1746109750920, "user": { "displayName": "Ross Giglio", "userId": "00534511557889516608" }, "user_tz": 240 }, "id": "dIqSnIF5rd9C" }, "outputs": [], "source": [ "import ast\n", "import numpy as np\n", "import pandas as pd\n", "import os, errno\n", "import datetime\n", "import uuid\n", "import itertools\n", "import yaml\n", "import subprocess\n", "import scipy.sparse as sp\n", "from datasets import load_dataset\n", "from scipy.sparse import csr_matrix\n", "from scipy.spatial.distance import squareform\n", "from sklearn.decomposition import non_negative_factorization\n", "from sklearn.cluster import KMeans\n", "from sklearn.metrics import silhouette_score\n", "from sklearn.metrics.pairwise import euclidean_distances\n", "from sklearn.utils import sparsefuncs\n", "from scipy.cluster.hierarchy import leaves_list\n", "import matplotlib.pyplot as plt\n", "from multiprocessing import Pool\n", "import scanpy as sc\n", "import anndata as ad\n", "%matplotlib inline\n", "from scipy.io import mmread\n", "from IPython.display import Image\n", "import anndata\n", "import seaborn as sns\n", "import scvi\n", "from scvi.external import MRVI as MrVI" ] }, { "cell_type": "markdown", "metadata": { "id": "kSIHf3R6sjys" }, "source": [ "# Upload Data" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "AnnData object with n_obs × n_vars = 67018 × 62710\n", " obs: 'sample', 'drugname_drugconc', 'drug', 'n_cells', 'tscp_count', 'plate', 'Cell_Name_Vevo', 'Cell_ID_Cellosaur'\n", " var: 'gene_id', 'genome'" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# read in the h5ad file\n", "# use gsutil to download file directly\n", "f = \"bionemo2/20250213.Tahoe.merged.pseudobulk.public.h5ad\"\n", "adata = sc.read_h5ad(f)\n", "adata" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(67018, 62710)" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "adata.shape" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "drugname_drugconc\n", "[('DMSO_TF', 0.0, 'uM')] 1400\n", "[('Adagrasib', 0.05, 'uM')] 800\n", "[('Afatinib', 5.0, 'uM')] 150\n", "[('Afatinib', 0.5, 'uM')] 150\n", "[('Afatinib', 0.05, 'uM')] 150\n", " ... \n", "[('LY-2584702 (tosylate salt)', 0.5, 'uM')] 49\n", "[('SBI-0640756', 0.5, 'uM')] 48\n", "[('Belzutifan', 0.05, 'uM')] 48\n", "[('Erdafitinib ', 0.5, 'uM')] 46\n", "[('Encorafenib', 0.5, 'uM')] 36\n", "Name: count, Length: 1138, dtype: int64" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "adata.obs.value_counts('drugname_drugconc')" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 0 }, "executionInfo": { "elapsed": 61, "status": "ok", "timestamp": 1746109783501, "user": { "displayName": "Ross Giglio", "userId": "00534511557889516608" }, "user_tz": 240 }, "id": "6kxlQtu6s1ff", "outputId": "bf247ee0-ccf7-4b79-f6a7-77e8d0a91f6c" }, "outputs": [ { "data": { "text/html": [ "
\n", " | sample | \n", "drugname_drugconc | \n", "drug | \n", "n_cells | \n", "tscp_count | \n", "plate | \n", "Cell_Name_Vevo | \n", "Cell_ID_Cellosaur | \n", "dose | \n", "cell_drug | \n", "
---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "smp_1975 | \n", "[('8-Hydroxyquinoline', 5.0, 'uM')] | \n", "8-Hydroxyquinoline | \n", "3076 | \n", "6686156.0 | \n", "6 | \n", "A549 | \n", "CVCL_0023 | \n", "5.0 | \n", "A549_8-Hydroxyquinoline | \n", "
1 | \n", "smp_1975 | \n", "[('8-Hydroxyquinoline', 5.0, 'uM')] | \n", "8-Hydroxyquinoline | \n", "1505 | \n", "4744833.0 | \n", "6 | \n", "HS-578T | \n", "CVCL_0332 | \n", "5.0 | \n", "HS-578T_8-Hydroxyquinoline | \n", "
2 | \n", "smp_1975 | \n", "[('8-Hydroxyquinoline', 5.0, 'uM')] | \n", "8-Hydroxyquinoline | \n", "1700 | \n", "2494504.0 | \n", "6 | \n", "HCT15 | \n", "CVCL_0292 | \n", "5.0 | \n", "HCT15_8-Hydroxyquinoline | \n", "
3 | \n", "smp_1975 | \n", "[('8-Hydroxyquinoline', 5.0, 'uM')] | \n", "8-Hydroxyquinoline | \n", "3560 | \n", "8923357.0 | \n", "6 | \n", "HOP62 | \n", "CVCL_1285 | \n", "5.0 | \n", "HOP62_8-Hydroxyquinoline | \n", "
4 | \n", "smp_1975 | \n", "[('8-Hydroxyquinoline', 5.0, 'uM')] | \n", "8-Hydroxyquinoline | \n", "1876 | \n", "4676765.0 | \n", "6 | \n", "SK-MEL-2 | \n", "CVCL_0069 | \n", "5.0 | \n", "SK-MEL-2_8-Hydroxyquinoline | \n", "