maom commited on
Commit
7166938
·
verified ·
1 Parent(s): 134bbe7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +122 -16
app.py CHANGED
@@ -1,28 +1,134 @@
1
 
 
 
 
2
  import streamlit as st
 
3
 
4
- import pkg_resources
5
- installed_packages = pkg_resources.working_set
6
- installed_packages_list = sorted(["%s==%s" % (i.key, i.version)
7
- for i in installed_packages])
8
- print(installed_packages_list)
9
 
10
- st.write(installed_packages_list)
 
 
 
 
 
11
 
12
- from streamlit_cytoscapejs import st_cytoscapejs
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
- elements = [
15
- {"data": {"id": "one", "label": "Node 1"}, "position": {"x": 0, "y": 0}},
16
- {"data": {"id": "two", "label": "Node 2"}, "position": {"x": 100, "y": 0}},
17
- {"data": {"source": "one", "target": "two", "label": "Edge from Node1 to Node2"}},
18
- ]
19
  stylesheet = [
20
  {"selector": "node", "style": {"width": 20, "height": 20, "shape": "rectangle"}},
21
  {"selector": "edge", "style": {"width": 10}},
22
  ]
23
 
24
- st.title("Hello Cytoscape.js")
25
  clicked_elements = st_cytoscapejs(elements, stylesheet)
26
-
27
- if clicked_elements is not None:
28
- st.write(clicked_elements)
 
1
 
2
+ import numpy as np
3
+ import pandas as pd
4
+ import datasets
5
  import streamlit as st
6
+ from streamlit_cytoscapejs import st_cytoscapejs
7
 
8
+ st.set_page_config(layout='wide')
 
 
 
 
9
 
10
+ # parse out gene_ids from URL query args to it's possible to link to this page
11
+ query_params = st.query_params
12
+ if "gene_ids" in query_params.keys():
13
+ input_gene_ids = query_params["gene_ids"]
14
+ else:
15
+ input_gene_ids = "TGME49_231630,TGME49_230210"
16
 
17
+ # use "\n" as the separator so it shows correctly in the text area
18
+ input_gene_ids = gene_ids.replace(",", "\n")
19
+
20
+
21
+
22
+ st.markdown("""
23
+ # ToxoCEN Network
24
+ **ToxoCEN** is a co-expression network for *Toxoplasma gondii* built on 719 RNA-seq runs across 39 studies.
25
+ A pair of genes are said to be co-expressed when their expression is correlated across different conditions and
26
+ is often a marker for genes to be involved in similar processes.
27
+ To Cite:
28
+ CS Arnold, Y Wang, VB Carruthers, MJ O'Meara
29
+ ToxoCEN: A Co-Expression Network for Toxoplasma gondii
30
+ * Code available at https://github.com/maomlab/CalCEN/tree/master/vignettes/ToxoCEN
31
+ * Full network and dataset: https://huggingface.co/datasets/maomlab/ToxoCEN
32
+ ## Plot a network for a set of genes
33
+ Put a ``TGME49_######`` gene_id, one each row to seed the network
34
+ """)
35
+
36
+ TGME49_transcript_annotations = datasets.load_dataset(
37
+ path = "maomlab/ToxoCEN",
38
+ data_files = {"TGME49_transcript_annotations": "TGME49_transcript_annotations.tsv"})
39
+ TGME49_transcript_annotations = TGME49_transcript_annotations["TGME49_transcript_annotations"].to_pandas()
40
+
41
+ top_coexp_hits = datasets.load_dataset(
42
+ path = "maomlab/ToxoCEN",
43
+ data_files = {"top_coexp_hits": "top_coexp_hits.tsv"})
44
+ top_coexp_hits = top_coexp_hits["top_coexp_hits"].to_pandas()
45
+
46
+
47
+ col1, col3, padding = st.columns(spec = [0.2, 0.2, 0.6])
48
+ with col1:
49
+ input_gene_ids = st.text_area(
50
+ label = "Gene IDs",
51
+ value = f"{input_gene_ids}",
52
+ help = "TGME49 Gene IDs e.g. TGME49_231630")
53
+
54
+ coexp_score_threshold = 0.85
55
+
56
+ ##################################
57
+ # Parse and check the user input #
58
+ ##################################
59
+
60
+ seed_gene_ids = [gene_id.strip() for gene_id in gene_ids.split("\n")]
61
+
62
+ neighbors = []
63
+ for seed_gene_id in seed_gene_ids:
64
+ neighbors.append(
65
+ top_coexp_hits[
66
+ (top_coexp_hits.gene_id_1 == seed_gene_id) and (top_coexp_hits.coexp_score > coexp_score_threshold)])
67
+
68
+ neighbors = pd.concat(neighbors)
69
+
70
+ neighbor_gene_ids = set(neighbors.gene_id_2)
71
+ gene_ids = seed_gene_ids + neighbor_gene_ids
72
+ gene_types = ['seed'] * len(seed_genes) + ['neighbor'] * len(neighbor_gene_ids)
73
+
74
+ TGME49_ids = []
75
+ gene_names = []
76
+ descriptions = []
77
+
78
+ for gene_id in gene_ids:
79
+ try:
80
+ TGME49_id = TGME49_transcript_annotations.loc[TGME49_transcript_annotations["gene_id"] == gene_id_["TGME49_id"].values[0]
81
+ gene_name = TGME49_transcript_annotations.loc[TGME49_transcript_annotations["gene_id"] == gene_id]["gene_name"].values[0]
82
+ description = TGME49_transcript_annotations.loc[TGME49_transcript_annotations["gene_id"] == gene_id]["description"].values[0]
83
+ except:
84
+ st.error(f"Unable to locate TGME49_id for Gene ID: {gene_id}, it should be of the form 'TGME49_######'")
85
+ TGME49_id = None
86
+ gene_name = None
87
+ description = None
88
+
89
+ TGME49_ids.append(TGME49_id)
90
+ gene_names.append(gene_name)
91
+ descriptions.append(description)
92
+
93
+ node_info = pd.DataFrame({
94
+ "gene_id" : gene_ids,
95
+ "gene_type" : gene_types,
96
+ "TGME49_id": TGME49_ids,
97
+ "gene_name": gene_names,
98
+ "description": description})
99
+
100
+ network_data = []
101
+ for i in range(gene_ids):
102
+ network_data.append({
103
+ "data": {
104
+ "id": gene_ids[i],
105
+ "label": gene_name if gene_names[i] is not None else gene_id[i]},
106
+ "position": {
107
+ "x" : i * 10,
108
+ "y" : i * 10}})
109
+ for i in neighbors.shape()[0]:
110
+ edge = neigbors.iloc[i]
111
+ network_data.append({
112
+ "data" : {
113
+ "source" : edge["gene_id_1"],
114
+ "target" : edge["gene_id_2"],
115
+ "label" : edge["coexp_score"]}})
116
+
117
+
118
+ with col3:
119
+ st.text('') # help alignment with input box
120
+ st.download_button(
121
+ label = "Download as as TSV",
122
+ data = neighbors.to_csv(sep ='\t').encode('utf-8'),
123
+ file_name = f"ToxoCEN_network.tsv",
124
+ mime = "text/csv")
125
+
126
+ ##########################################################
127
 
 
 
 
 
 
128
  stylesheet = [
129
  {"selector": "node", "style": {"width": 20, "height": 20, "shape": "rectangle"}},
130
  {"selector": "edge", "style": {"width": 10}},
131
  ]
132
 
133
+ st.title("ToxoCEN Network")
134
  clicked_elements = st_cytoscapejs(elements, stylesheet)