Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,28 +1,134 @@
|
|
1 |
|
|
|
|
|
|
|
2 |
import streamlit as st
|
|
|
3 |
|
4 |
-
|
5 |
-
installed_packages = pkg_resources.working_set
|
6 |
-
installed_packages_list = sorted(["%s==%s" % (i.key, i.version)
|
7 |
-
for i in installed_packages])
|
8 |
-
print(installed_packages_list)
|
9 |
|
10 |
-
|
|
|
|
|
|
|
|
|
|
|
11 |
|
12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
|
14 |
-
elements = [
|
15 |
-
{"data": {"id": "one", "label": "Node 1"}, "position": {"x": 0, "y": 0}},
|
16 |
-
{"data": {"id": "two", "label": "Node 2"}, "position": {"x": 100, "y": 0}},
|
17 |
-
{"data": {"source": "one", "target": "two", "label": "Edge from Node1 to Node2"}},
|
18 |
-
]
|
19 |
stylesheet = [
|
20 |
{"selector": "node", "style": {"width": 20, "height": 20, "shape": "rectangle"}},
|
21 |
{"selector": "edge", "style": {"width": 10}},
|
22 |
]
|
23 |
|
24 |
-
st.title("
|
25 |
clicked_elements = st_cytoscapejs(elements, stylesheet)
|
26 |
-
|
27 |
-
if clicked_elements is not None:
|
28 |
-
st.write(clicked_elements)
|
|
|
1 |
|
2 |
+
import numpy as np
|
3 |
+
import pandas as pd
|
4 |
+
import datasets
|
5 |
import streamlit as st
|
6 |
+
from streamlit_cytoscapejs import st_cytoscapejs
|
7 |
|
8 |
+
st.set_page_config(layout='wide')
|
|
|
|
|
|
|
|
|
9 |
|
10 |
+
# parse out gene_ids from URL query args to it's possible to link to this page
|
11 |
+
query_params = st.query_params
|
12 |
+
if "gene_ids" in query_params.keys():
|
13 |
+
input_gene_ids = query_params["gene_ids"]
|
14 |
+
else:
|
15 |
+
input_gene_ids = "TGME49_231630,TGME49_230210"
|
16 |
|
17 |
+
# use "\n" as the separator so it shows correctly in the text area
|
18 |
+
input_gene_ids = gene_ids.replace(",", "\n")
|
19 |
+
|
20 |
+
|
21 |
+
|
22 |
+
st.markdown("""
|
23 |
+
# ToxoCEN Network
|
24 |
+
**ToxoCEN** is a co-expression network for *Toxoplasma gondii* built on 719 RNA-seq runs across 39 studies.
|
25 |
+
A pair of genes are said to be co-expressed when their expression is correlated across different conditions and
|
26 |
+
is often a marker for genes to be involved in similar processes.
|
27 |
+
To Cite:
|
28 |
+
CS Arnold, Y Wang, VB Carruthers, MJ O'Meara
|
29 |
+
ToxoCEN: A Co-Expression Network for Toxoplasma gondii
|
30 |
+
* Code available at https://github.com/maomlab/CalCEN/tree/master/vignettes/ToxoCEN
|
31 |
+
* Full network and dataset: https://huggingface.co/datasets/maomlab/ToxoCEN
|
32 |
+
## Plot a network for a set of genes
|
33 |
+
Put a ``TGME49_######`` gene_id, one each row to seed the network
|
34 |
+
""")
|
35 |
+
|
36 |
+
TGME49_transcript_annotations = datasets.load_dataset(
|
37 |
+
path = "maomlab/ToxoCEN",
|
38 |
+
data_files = {"TGME49_transcript_annotations": "TGME49_transcript_annotations.tsv"})
|
39 |
+
TGME49_transcript_annotations = TGME49_transcript_annotations["TGME49_transcript_annotations"].to_pandas()
|
40 |
+
|
41 |
+
top_coexp_hits = datasets.load_dataset(
|
42 |
+
path = "maomlab/ToxoCEN",
|
43 |
+
data_files = {"top_coexp_hits": "top_coexp_hits.tsv"})
|
44 |
+
top_coexp_hits = top_coexp_hits["top_coexp_hits"].to_pandas()
|
45 |
+
|
46 |
+
|
47 |
+
col1, col3, padding = st.columns(spec = [0.2, 0.2, 0.6])
|
48 |
+
with col1:
|
49 |
+
input_gene_ids = st.text_area(
|
50 |
+
label = "Gene IDs",
|
51 |
+
value = f"{input_gene_ids}",
|
52 |
+
help = "TGME49 Gene IDs e.g. TGME49_231630")
|
53 |
+
|
54 |
+
coexp_score_threshold = 0.85
|
55 |
+
|
56 |
+
##################################
|
57 |
+
# Parse and check the user input #
|
58 |
+
##################################
|
59 |
+
|
60 |
+
seed_gene_ids = [gene_id.strip() for gene_id in gene_ids.split("\n")]
|
61 |
+
|
62 |
+
neighbors = []
|
63 |
+
for seed_gene_id in seed_gene_ids:
|
64 |
+
neighbors.append(
|
65 |
+
top_coexp_hits[
|
66 |
+
(top_coexp_hits.gene_id_1 == seed_gene_id) and (top_coexp_hits.coexp_score > coexp_score_threshold)])
|
67 |
+
|
68 |
+
neighbors = pd.concat(neighbors)
|
69 |
+
|
70 |
+
neighbor_gene_ids = set(neighbors.gene_id_2)
|
71 |
+
gene_ids = seed_gene_ids + neighbor_gene_ids
|
72 |
+
gene_types = ['seed'] * len(seed_genes) + ['neighbor'] * len(neighbor_gene_ids)
|
73 |
+
|
74 |
+
TGME49_ids = []
|
75 |
+
gene_names = []
|
76 |
+
descriptions = []
|
77 |
+
|
78 |
+
for gene_id in gene_ids:
|
79 |
+
try:
|
80 |
+
TGME49_id = TGME49_transcript_annotations.loc[TGME49_transcript_annotations["gene_id"] == gene_id_["TGME49_id"].values[0]
|
81 |
+
gene_name = TGME49_transcript_annotations.loc[TGME49_transcript_annotations["gene_id"] == gene_id]["gene_name"].values[0]
|
82 |
+
description = TGME49_transcript_annotations.loc[TGME49_transcript_annotations["gene_id"] == gene_id]["description"].values[0]
|
83 |
+
except:
|
84 |
+
st.error(f"Unable to locate TGME49_id for Gene ID: {gene_id}, it should be of the form 'TGME49_######'")
|
85 |
+
TGME49_id = None
|
86 |
+
gene_name = None
|
87 |
+
description = None
|
88 |
+
|
89 |
+
TGME49_ids.append(TGME49_id)
|
90 |
+
gene_names.append(gene_name)
|
91 |
+
descriptions.append(description)
|
92 |
+
|
93 |
+
node_info = pd.DataFrame({
|
94 |
+
"gene_id" : gene_ids,
|
95 |
+
"gene_type" : gene_types,
|
96 |
+
"TGME49_id": TGME49_ids,
|
97 |
+
"gene_name": gene_names,
|
98 |
+
"description": description})
|
99 |
+
|
100 |
+
network_data = []
|
101 |
+
for i in range(gene_ids):
|
102 |
+
network_data.append({
|
103 |
+
"data": {
|
104 |
+
"id": gene_ids[i],
|
105 |
+
"label": gene_name if gene_names[i] is not None else gene_id[i]},
|
106 |
+
"position": {
|
107 |
+
"x" : i * 10,
|
108 |
+
"y" : i * 10}})
|
109 |
+
for i in neighbors.shape()[0]:
|
110 |
+
edge = neigbors.iloc[i]
|
111 |
+
network_data.append({
|
112 |
+
"data" : {
|
113 |
+
"source" : edge["gene_id_1"],
|
114 |
+
"target" : edge["gene_id_2"],
|
115 |
+
"label" : edge["coexp_score"]}})
|
116 |
+
|
117 |
+
|
118 |
+
with col3:
|
119 |
+
st.text('') # help alignment with input box
|
120 |
+
st.download_button(
|
121 |
+
label = "Download as as TSV",
|
122 |
+
data = neighbors.to_csv(sep ='\t').encode('utf-8'),
|
123 |
+
file_name = f"ToxoCEN_network.tsv",
|
124 |
+
mime = "text/csv")
|
125 |
+
|
126 |
+
##########################################################
|
127 |
|
|
|
|
|
|
|
|
|
|
|
128 |
stylesheet = [
|
129 |
{"selector": "node", "style": {"width": 20, "height": 20, "shape": "rectangle"}},
|
130 |
{"selector": "edge", "style": {"width": 10}},
|
131 |
]
|
132 |
|
133 |
+
st.title("ToxoCEN Network")
|
134 |
clicked_elements = st_cytoscapejs(elements, stylesheet)
|
|
|
|
|
|