Spaces:

UoS-HGIG
/

HPO_Mapper

Running

App Files Files Community

parsboy1987 commited on Mar 12

Commit

a2e895f

verified ·

1 Parent(s): 5aebf41

Update app.py

Browse files

Files changed (1) hide show

app.py +34 -36

app.py CHANGED Viewed

@@ -2,22 +2,28 @@ import gradio as gr
 import sqlite3
 import json
 import numpy as np
-import subprocess  # To run OntoGPT as a CLI command
 from numpy.linalg import norm
 from huggingface_hub import hf_hub_download
 from sentence_transformers import SentenceTransformer
 import os
 # Get Hugging Face Token from Environment Variables
 HF_TOKEN = os.environ.get("HF_TOKEN")
 if not HF_TOKEN:
-    raise ValueError("Missing Hugging Face API token. Please set HF_TOKEN as an environment variable in Hugging Face Secrets.")
-# Load the Nomic-Embed Model from Hugging Face with trust_remote_code=True
 EMBEDDING_MODEL = "nomic-ai/nomic-embed-text-v1.5"
 embedder = SentenceTransformer(EMBEDDING_MODEL, trust_remote_code=True)
-# Download database from Hugging Face Datasets if not exists
 db_filename = "hpo_genes.db"
 db_repo = "UoS-HGIG/hpo_genes"
 db_path = os.path.join(os.getcwd(), db_filename)
@@ -25,6 +31,7 @@ db_path = os.path.join(os.getcwd(), db_filename)
 if not os.path.exists(db_path):
     db_path = hf_hub_download(repo_id=db_repo, filename=db_filename, repo_type="dataset", use_auth_token=HF_TOKEN)
 def find_best_hpo_match(finding, region, threshold):
     """Finds the best HPO match using semantic similarity."""
     query_text = f"{finding} in {region}"
@@ -45,7 +52,8 @@ def find_best_hpo_match(finding, region, threshold):
             best_match = {"hpo_id": hpo_id, "hpo_term": hpo_name}
     conn.close()
-    return best_match if best_score > threshold else None  # Adjust threshold based on user input
 def get_genes_for_hpo(hpo_id):
     """Retrieves associated genes for a given HPO ID."""
@@ -56,46 +64,35 @@ def get_genes_for_hpo(hpo_id):
     conn.close()
     return result[0].split(", ") if result else []
-def extract_with_ontogpt(finding, region):
-    """Uses OntoGPT CLI to extract ontology terms."""
-    input_text = f"{finding} observed in {region}."
-    try:
-        # Run OntoGPT extraction (modify parameters as needed)
-        result = subprocess.run(
-            ["ontogpt", "extract", "-t", "hpo", "-m", "meta-llama/Llama-3.1-70B-Instruct"],
-            input=input_text,
-            text=True,
-            capture_output=True
-        )
-        return result.stdout.strip()  # Return extracted ontology term
-    except Exception as e:
-        return f"Error running OntoGPT: {str(e)}"
 def get_hpo_for_finding(finding, region, threshold):
-    """Finds the best HPO term and retrieves associated genes, enriched with OntoGPT."""
     hpo_match = find_best_hpo_match(finding, region, threshold)
     if hpo_match:
-        hpo_id = hpo_match["hpo_id"]
-        hpo_match["genes"] = get_genes_for_hpo(hpo_id)
-        # Use OntoGPT to refine the mapping
-        enriched_description = extract_with_ontogpt(finding, region)
-        hpo_match["description"] = enriched_description
     else:
-        hpo_match = {"hpo_id": "NA", "hpo_term": "NA", "genes": [], "description": "No match found."}
     return hpo_match
 def hpo_mapper_ui(finding, region, threshold):
-    """Function for Gradio UI to get HPO mappings."""
     if not finding or not region:
-        return "Please enter both finding and region.", "", "", ""
-    result = get_hpo_for_finding(finding, region, threshold)
-    return result["hpo_id"], result["hpo_term"], ", ".join(result["genes"]), result["description"]
 # Create Gradio UI
 demo = gr.Interface(
@@ -109,7 +106,7 @@ demo = gr.Interface(
         gr.Textbox(label="HPO ID"),
         gr.Textbox(label="HPO Term"),
         gr.Textbox(label="Associated Genes"),
-        gr.Textbox(label="OntoGPT Description")  # New field for enriched ontology output
     ],
     title="HPO Mapper with OntoGPT",
     description=(
@@ -120,6 +117,7 @@ demo = gr.Interface(
         "Alex Z Kadhim, Zachary Green, Iman Nazari, Jonathan Baker, Michael George, Ashley Heinson, Matt Stammers, Christopher Kipps, R Mark Beattie, James J Ashton, Sarah Ennis\n"
         "medRxiv 2025.03.07.25323569; [DOI: 10.1101/2025.03.07.25323569](https://doi.org/10.1101/2025.03.07.25323569)"
     )
 )
 if __name__ == "__main__":

 import sqlite3
 import json
 import numpy as np
 from numpy.linalg import norm
 from huggingface_hub import hf_hub_download
 from sentence_transformers import SentenceTransformer
 import os
+import subprocess
 # Get Hugging Face Token from Environment Variables
 HF_TOKEN = os.environ.get("HF_TOKEN")
 if not HF_TOKEN:
+    raise ValueError("Missing Hugging Face API token. Please set HF_TOKEN as an environment variable.")
+# Set Hugging Face API key for OntoGPT
+subprocess.run(["runoak", "set-apikey", "-e", "huggingface-key", HF_TOKEN], check=True)
+# Define OntoGPT model
+ONTOGPT_MODEL = "huggingface/WizardLM/WizardCoder-Python-34B-V1.0"
+# Load the Nomic-Embed Model
 EMBEDDING_MODEL = "nomic-ai/nomic-embed-text-v1.5"
 embedder = SentenceTransformer(EMBEDDING_MODEL, trust_remote_code=True)
+# Download database from Hugging Face if not exists
 db_filename = "hpo_genes.db"
 db_repo = "UoS-HGIG/hpo_genes"
 db_path = os.path.join(os.getcwd(), db_filename)
 if not os.path.exists(db_path):
     db_path = hf_hub_download(repo_id=db_repo, filename=db_filename, repo_type="dataset", use_auth_token=HF_TOKEN)
 def find_best_hpo_match(finding, region, threshold):
     """Finds the best HPO match using semantic similarity."""
     query_text = f"{finding} in {region}"
             best_match = {"hpo_id": hpo_id, "hpo_term": hpo_name}
     conn.close()
+    return best_match if best_score > threshold else None
 def get_genes_for_hpo(hpo_id):
     """Retrieves associated genes for a given HPO ID."""
     conn.close()
     return result[0].split(", ") if result else []
 def get_hpo_for_finding(finding, region, threshold):
+    """Finds the best HPO term and retrieves associated genes."""
     hpo_match = find_best_hpo_match(finding, region, threshold)
     if hpo_match:
+        hpo_match["genes"] = get_genes_for_hpo(hpo_match["hpo_id"])
     else:
+        hpo_match = {"hpo_id": "NA", "hpo_term": "NA", "genes": []}
     return hpo_match
+def run_ontogpt(finding, region):
+    """Runs OntoGPT to extract information."""
+    input_text = f"{finding} in {region}"
+    result = subprocess.run([
+        "ontogpt", "complete", "-m", ONTOGPT_MODEL, "-i", input_text
+    ], capture_output=True, text=True)
+    return result.stdout.strip()
 def hpo_mapper_ui(finding, region, threshold):
+    """Function for Gradio UI to get HPO mappings and OntoGPT results."""
     if not finding or not region:
+        return "Please enter both finding and region.", "", ""
+    hpo_result = get_hpo_for_finding(finding, region, threshold)
+    ontogpt_output = run_ontogpt(finding, region)
+    return hpo_result["hpo_id"], hpo_result["hpo_term"], ", ".join(hpo_result["genes"]), ontogpt_output
 # Create Gradio UI
 demo = gr.Interface(
         gr.Textbox(label="HPO ID"),
         gr.Textbox(label="HPO Term"),
         gr.Textbox(label="Associated Genes"),
+        gr.Textbox(label="OntoGPT Output")
     ],
     title="HPO Mapper with OntoGPT",
     description=(
         "Alex Z Kadhim, Zachary Green, Iman Nazari, Jonathan Baker, Michael George, Ashley Heinson, Matt Stammers, Christopher Kipps, R Mark Beattie, James J Ashton, Sarah Ennis\n"
         "medRxiv 2025.03.07.25323569; [DOI: 10.1101/2025.03.07.25323569](https://doi.org/10.1101/2025.03.07.25323569)"
     )
 )
 if __name__ == "__main__":