Spaces:

maomlab
/

CryptoCEN-ExpressionScatter

Running

App Files Files Community

maom commited on Jan 29, 2024

Commit

f9b57d0

verified ·

1 Parent(s): 4131518

use estimated_expression_matrix.parquet

Browse files

Files changed (1) hide show

app.py +27 -12

app.py CHANGED Viewed

@@ -35,16 +35,13 @@ estimated_expression_meta = estimated_expression_meta["estimated_expression_meta
 estimated_expression = datasets.load_dataset(
     path = "maomlab/CryptoCEN",
-    data_files = {"estimated_expression": "estimated_expression.tsv"})
 estimated_expression = estimated_expression["estimated_expression"].to_pandas()
 print(f"estimated_expression shape: {estimated_expression.shape}")
-print(f"transcript_annotations are equal: {sum(h99_transcript_annotations['cnag_id'] == estimated_expression.index)}")
-col1, col2, col3 = st.columns(spec = [0.2, 0.2, 0.6])
 with col1:
     gene_id_1 = st.text_input(
         label = "Gene ID 1",
@@ -52,7 +49,6 @@ with col1:
         max_chars = 10,
         help = "CNAG Gene ID e.g. CNAG_04365")
 with col2:
     gene_id_2 = st.text_input(
         label = "Gene ID 2",
@@ -60,20 +56,39 @@ with col2:
         max_chars = 10,
         help = "CNAG Gene ID e.g. CNAG_04222")
 chart_data = pd.DataFrame({
-    "expression_1": np.log10(estimated_expression.loc[h99_transcript_annotations["gene_id"] == gene_id_1].to_numpy()[0] + 1),
-    "expression_2": np.log10(estimated_expression.loc[h99_transcript_annotations["gene_id"] == gene_id_2].to_numpy()[0] + 1),
     "run_accession": estimated_expression.columns,
     "run_accession_meta": estimated_expression_meta["run_accession"],
     "study_accession": estimated_expression_meta["study_accession"]})
 print(f"run_ids are equal: {sum(chart_data['run_accession'] == chart_data['run_accession_meta'])}")
 chart = (
    alt.Chart(chart_data)
    .mark_circle()
-   .encode(x="expression_1", y="expression_2", size=5, color="study_accession", tooltip=["run_accession", "study_accession"]))
-st.altair_chart(chart, use_container_width=True)

 estimated_expression = datasets.load_dataset(
     path = "maomlab/CryptoCEN",
+    data_files = {"estimated_expression_matrix": "estimated_expression_matrix.parquet"})
 estimated_expression = estimated_expression["estimated_expression"].to_pandas()
+#DEBUG
 print(f"estimated_expression shape: {estimated_expression.shape}")
+col1, col2, padding = st.columns(spec = [0.2, 0.2, 0.6])
 with col1:
     gene_id_1 = st.text_input(
         label = "Gene ID 1",
         max_chars = 10,
         help = "CNAG Gene ID e.g. CNAG_04365")
 with col2:
     gene_id_2 = st.text_input(
         label = "Gene ID 2",
         max_chars = 10,
         help = "CNAG Gene ID e.g. CNAG_04222")
+# check the user input
+try:
+    cnag_id_1 = h99_transcript_annotations.loc[h99_transcript_annotations["gene_id"] == gene_id_1]["cnag_id"]
+except:
+    st.error(f"Unable to locate cnag_id for Gene ID 1: {gene_id_1}, it should be of the form 'CNAG_######'")
+try:
+    cnag_id_2 = h99_transcript_annotations.loc[h99_transcript_annotations["gene_id"] == gene_id_2]["cnag_id"]
+except:
+    st.error(f"Unable to locate cnag_id for Gene ID 2: {gene_id_2}, it should be of the form 'CNAG_######'")
 chart_data = pd.DataFrame({
+    "expression_1": np.log10(estimated_expression.loc[estiamted_expression.index == cnag_id_1].to_numpy()[0] + 1),
+    "expression_2": np.log10(estimated_expression.loc[estimated_expression_index == cnag_id_2].to_numpy()[0] + 1),
     "run_accession": estimated_expression.columns,
     "run_accession_meta": estimated_expression_meta["run_accession"],
     "study_accession": estimated_expression_meta["study_accession"]})
+# DEBUG
 print(f"run_ids are equal: {sum(chart_data['run_accession'] == chart_data['run_accession_meta'])}")
 chart = (
    alt.Chart(chart_data)
    .mark_circle()
+   .encode(
+       x="expression_1",
+       y="expression_2",
+       size=5,
+       color="study_accession",
+       tooltip=["run_accession", "study_accession"]))
+st.altair_chart(
+    chart,
+    use_container_width=True)