Spaces:

UoS-HGIG
/

HPO_Mapper

Running

App Files Files Community

akadhim commited on Mar 20

Commit

dc4a259

verified ·

1 Parent(s): 2875c81

Update app.py

Browse files

Files changed (1) hide show

app.py +20 -28

app.py CHANGED Viewed

@@ -3,35 +3,18 @@ import sqlite3
 import json
 import numpy as np
 from numpy.linalg import norm
-from huggingface_hub import hf_hub_download, snapshot_download
 from sentence_transformers import SentenceTransformer
 import os
-import logging
-# Set up logging in a hidden directory
-log_dir = os.path.expanduser("~/.logs")
-os.makedirs(log_dir, exist_ok=True)
-log_file = os.path.join(log_dir, "hpo_mapper.log")
-logging.basicConfig(
-    filename=log_file,
-    level=logging.INFO,
-    format="%(asctime)s - %(levelname)s - %(message)s"
-)
 # Get Hugging Face Token from Environment Variables
 HF_TOKEN = os.environ.get("HF_TOKEN")
 if not HF_TOKEN:
     raise ValueError("Missing Hugging Face API token. Please set HF_TOKEN as an environment variable in Hugging Face Secrets.")
-# Load the Nomic-Embed Model from Hugging Face with forced download
 EMBEDDING_MODEL = "nomic-ai/nomic-embed-text-v1.5"
-try:
-    model_path = snapshot_download(EMBEDDING_MODEL, trust_remote_code=True, force_download=True)
-    embedder = SentenceTransformer(model_path, trust_remote_code=True)
-except Exception as e:
-    logging.error(f"Failed to load model: {e}")
-    raise
 # Download database from Hugging Face Datasets if it does not exist
 db_filename = "hpo_genes.db"
@@ -41,6 +24,7 @@ db_path = os.path.join(os.getcwd(), db_filename)
 if not os.path.exists(db_path):
     db_path = hf_hub_download(repo_id=db_repo, filename=db_filename, repo_type="dataset", use_auth_token=HF_TOKEN)
 def find_best_hpo_match(finding, region, threshold):
     query_text = f"{finding} in {region}" if region else finding
     query_embedding = embedder.encode(query_text)
@@ -62,6 +46,7 @@ def find_best_hpo_match(finding, region, threshold):
     conn.close()
     return best_match if best_score >= threshold else None
 def get_genes_for_hpo(hpo_id):
     conn = sqlite3.connect(db_path)
     cursor = conn.cursor()
@@ -70,6 +55,7 @@ def get_genes_for_hpo(hpo_id):
     conn.close()
     return result[0].split(", ") if result else []
 def hpo_mapper_ui(finding, region, threshold):
     if not finding:
         return "Please enter a pathological finding.", "", ""
@@ -78,19 +64,17 @@ def hpo_mapper_ui(finding, region, threshold):
     if match:
         genes = get_genes_for_hpo(match["hpo_id"])
-        output = (match["hpo_id"], match["hpo_term"], ", ".join(genes))
-        logging.info(f"Input: Finding='{finding}', Region='{region}', Threshold={threshold} -> Output: {output}")
-        return output
-    logging.info(f"Input: Finding='{finding}', Region='{region}', Threshold={threshold} -> No match found.")
     return "No match found.", "", ""
 demo = gr.Interface(
     fn=hpo_mapper_ui,
     inputs=[
         gr.Textbox(label="Pathological Finding"),
         gr.Textbox(label="Anatomical Region (optional)"),
-        gr.Slider(0.5, 1.0, step=0.01, value=0.74, label="Similarity Threshold")
     ],
     outputs=[
         gr.Textbox(label="HPO ID"),
@@ -101,9 +85,17 @@ demo = gr.Interface(
     description=(
         'Enter a pathological finding (e.g., "chronic inflammation") and anatomical region '
         '(e.g., "terminal ileum") to map it to the closest Human Phenotype Ontology (HPO) '
-        'term and retrieve genes annotated as being associated with this HPO term.'
     )
 )
 if __name__ == "__main__":
-    demo.launch()

 import json
 import numpy as np
 from numpy.linalg import norm
+from huggingface_hub import hf_hub_download
 from sentence_transformers import SentenceTransformer
 import os
 # Get Hugging Face Token from Environment Variables
 HF_TOKEN = os.environ.get("HF_TOKEN")
 if not HF_TOKEN:
     raise ValueError("Missing Hugging Face API token. Please set HF_TOKEN as an environment variable in Hugging Face Secrets.")
+# Load the Nomic-Embed Model from Hugging Face
 EMBEDDING_MODEL = "nomic-ai/nomic-embed-text-v1.5"
+embedder = SentenceTransformer(EMBEDDING_MODEL, trust_remote_code=True)
 # Download database from Hugging Face Datasets if it does not exist
 db_filename = "hpo_genes.db"
 if not os.path.exists(db_path):
     db_path = hf_hub_download(repo_id=db_repo, filename=db_filename, repo_type="dataset", use_auth_token=HF_TOKEN)
 def find_best_hpo_match(finding, region, threshold):
     query_text = f"{finding} in {region}" if region else finding
     query_embedding = embedder.encode(query_text)
     conn.close()
     return best_match if best_score >= threshold else None
 def get_genes_for_hpo(hpo_id):
     conn = sqlite3.connect(db_path)
     cursor = conn.cursor()
     conn.close()
     return result[0].split(", ") if result else []
 def hpo_mapper_ui(finding, region, threshold):
     if not finding:
         return "Please enter a pathological finding.", "", ""
     if match:
         genes = get_genes_for_hpo(match["hpo_id"])
+        return match["hpo_id"], match["hpo_term"], ", ".join(genes)
     return "No match found.", "", ""
 demo = gr.Interface(
     fn=hpo_mapper_ui,
     inputs=[
         gr.Textbox(label="Pathological Finding"),
         gr.Textbox(label="Anatomical Region (optional)"),
+        gr.Slider(0.0, 1.0, step=0.01, value=0.76, label="Similarity Threshold")
     ],
     outputs=[
         gr.Textbox(label="HPO ID"),
     description=(
         'Enter a pathological finding (e.g., "chronic inflammation") and anatomical region '
         '(e.g., "terminal ileum") to map it to the closest Human Phenotype Ontology (HPO) '
+        'term and retrieve genes annotated as being associated with this HPO term.\n\n'
+        '**References:**\n'
+        'Kadhim, A. Z., Green, Z., Nazari, I., Baker, J., George, M., Heinson, A., Stammers, M., Kipps, C., Beattie, R. M., Ashton, J. J., & Ennis, S. (2025).\n'
+        'Application of generative artificial intelligence to utilise unstructured clinical data for acceleration of inflammatory bowel disease research.\n'
+        '*medRxiv*. [https://doi.org/10.1101/2025.03.07.25323569](https://doi.org/10.1101/2025.03.07.25323569)\n\n'
+        'Gargano, M. A., Matentzoglu, N., Coleman, B., Addo-Lartey, E. B., Anagnostopoulos, A. V., Anderton, J., Avillach, P., Bagley, A. M., Bakštein, E., Balhoff, J. P., Baynam, G., Bello, S. M., Berk, M., Bertram, H., Bishop, S., Blau, H., Bodenstein, D. F., Botas, P., Boztug, K., Čady, J., … Robinson, P. N. (2024)\n'
+        'The Human Phenotype Ontology in 2024: phenotypes around the world.\n'
+        '*Nucleic Acids Research* [https://doi.org/10.1093/nar/gkad1005](https://doi.org/10.1093/nar/gkad1005)\n\n'
+        'HPO to gene mappings obtained from [Jax](https://hpo.jax.org/data/annotations)'
     )
 )
 if __name__ == "__main__":
+    demo.launch()