Spaces:

maomlab
/

ToxoCEN-Network

Running

App Files Files Community

maom commited on Feb 15, 2024

Commit

7166938

verified ·

1 Parent(s): 134bbe7

Update app.py

Browse files

Files changed (1) hide show

app.py +122 -16

app.py CHANGED Viewed

@@ -1,28 +1,134 @@
 import streamlit as st
-import pkg_resources
-installed_packages = pkg_resources.working_set
-installed_packages_list = sorted(["%s==%s" % (i.key, i.version)
-   for i in installed_packages])
-print(installed_packages_list)
-st.write(installed_packages_list)
-from streamlit_cytoscapejs import st_cytoscapejs
-elements = [
-    {"data": {"id": "one", "label": "Node 1"}, "position": {"x": 0, "y": 0}},
-    {"data": {"id": "two", "label": "Node 2"}, "position": {"x": 100, "y": 0}},
-    {"data": {"source": "one", "target": "two", "label": "Edge from Node1 to Node2"}},
-]
 stylesheet = [
     {"selector": "node", "style": {"width": 20, "height": 20, "shape": "rectangle"}},
     {"selector": "edge", "style": {"width": 10}},
 ]
-st.title("Hello Cytoscape.js")
 clicked_elements = st_cytoscapejs(elements, stylesheet)
-if clicked_elements is not None:
-    st.write(clicked_elements)

+import numpy as np
+import pandas as pd
+import datasets
 import streamlit as st
+from streamlit_cytoscapejs import st_cytoscapejs
+st.set_page_config(layout='wide')
+# parse out gene_ids from URL query args to it's possible to link to this page
+query_params = st.query_params
+if "gene_ids" in query_params.keys():
+    input_gene_ids = query_params["gene_ids"]
+else:
+    input_gene_ids = "TGME49_231630,TGME49_230210"
+# use "\n" as the separator so it shows correctly in the text area
+input_gene_ids = gene_ids.replace(",", "\n")
+st.markdown("""
+# ToxoCEN Network
+**ToxoCEN** is a co-expression network for *Toxoplasma gondii* built on 719 RNA-seq runs across 39 studies.
+A pair of genes are said to be co-expressed when their expression is correlated across different conditions and
+is often a marker for genes to be involved in similar processes.
+To Cite:
+CS Arnold, Y Wang, VB Carruthers, MJ O'Meara
+ToxoCEN: A Co-Expression Network for Toxoplasma gondii
+* Code available at https://github.com/maomlab/CalCEN/tree/master/vignettes/ToxoCEN
+* Full network and dataset: https://huggingface.co/datasets/maomlab/ToxoCEN
+## Plot a network for a set of genes
+Put a ``TGME49_######`` gene_id, one each row to seed the network
+""")
+TGME49_transcript_annotations = datasets.load_dataset(
+    path = "maomlab/ToxoCEN",
+    data_files = {"TGME49_transcript_annotations": "TGME49_transcript_annotations.tsv"})
+TGME49_transcript_annotations = TGME49_transcript_annotations["TGME49_transcript_annotations"].to_pandas()
+top_coexp_hits = datasets.load_dataset(
+    path = "maomlab/ToxoCEN",
+    data_files = {"top_coexp_hits": "top_coexp_hits.tsv"})
+top_coexp_hits = top_coexp_hits["top_coexp_hits"].to_pandas()
+col1, col3, padding = st.columns(spec = [0.2, 0.2, 0.6])
+with col1:
+    input_gene_ids = st.text_area(
+        label = "Gene IDs",
+        value = f"{input_gene_ids}",
+        help = "TGME49 Gene IDs e.g. TGME49_231630")
+coexp_score_threshold = 0.85
+##################################
+# Parse and check the user input #
+##################################
+seed_gene_ids = [gene_id.strip() for gene_id in gene_ids.split("\n")]
+neighbors = []
+for seed_gene_id in seed_gene_ids:
+    neighbors.append(
+        top_coexp_hits[
+            (top_coexp_hits.gene_id_1 == seed_gene_id) and (top_coexp_hits.coexp_score > coexp_score_threshold)])
+neighbors = pd.concat(neighbors)
+neighbor_gene_ids = set(neighbors.gene_id_2)
+gene_ids = seed_gene_ids + neighbor_gene_ids
+gene_types = ['seed'] * len(seed_genes) + ['neighbor'] * len(neighbor_gene_ids)
+TGME49_ids = []
+gene_names = []
+descriptions = []
+for gene_id in gene_ids:
+    try:
+        TGME49_id = TGME49_transcript_annotations.loc[TGME49_transcript_annotations["gene_id"] == gene_id_["TGME49_id"].values[0]
+        gene_name = TGME49_transcript_annotations.loc[TGME49_transcript_annotations["gene_id"] == gene_id]["gene_name"].values[0]
+        description = TGME49_transcript_annotations.loc[TGME49_transcript_annotations["gene_id"] == gene_id]["description"].values[0]
+    except:
+        st.error(f"Unable to locate TGME49_id for Gene ID: {gene_id}, it should be of the form 'TGME49_######'")
+        TGME49_id = None
+        gene_name = None
+        description = None
+    TGME49_ids.append(TGME49_id)
+    gene_names.append(gene_name)
+    descriptions.append(description)
+node_info = pd.DataFrame({
+    "gene_id" : gene_ids,
+    "gene_type" : gene_types,
+    "TGME49_id": TGME49_ids,
+    "gene_name": gene_names,
+    "description": description})
+network_data = []
+for i in range(gene_ids):
+    network_data.append({
+        "data": {
+            "id": gene_ids[i],
+            "label": gene_name if gene_names[i] is not None else gene_id[i]},
+        "position": {
+            "x" : i * 10,
+            "y" : i * 10}})
+for i in neighbors.shape()[0]:
+    edge = neigbors.iloc[i]
+    network_data.append({
+        "data" : {
+            "source" : edge["gene_id_1"],
+            "target" : edge["gene_id_2"],
+            "label" : edge["coexp_score"]}})
+with col3:
+    st.text('') # help alignment with input box
+    st.download_button(
+        label = "Download as as TSV",
+        data = neighbors.to_csv(sep ='\t').encode('utf-8'),
+        file_name = f"ToxoCEN_network.tsv",
+        mime = "text/csv")
+##########################################################
 stylesheet = [
     {"selector": "node", "style": {"width": 20, "height": 20, "shape": "rectangle"}},
     {"selector": "edge", "style": {"width": 10}},
 ]
+st.title("ToxoCEN Network")
 clicked_elements = st_cytoscapejs(elements, stylesheet)