parsboy1987 commited on
Commit
a2e895f
·
verified ·
1 Parent(s): 5aebf41

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -36
app.py CHANGED
@@ -2,22 +2,28 @@ import gradio as gr
2
  import sqlite3
3
  import json
4
  import numpy as np
5
- import subprocess # To run OntoGPT as a CLI command
6
  from numpy.linalg import norm
7
  from huggingface_hub import hf_hub_download
8
  from sentence_transformers import SentenceTransformer
9
  import os
 
10
 
11
  # Get Hugging Face Token from Environment Variables
12
  HF_TOKEN = os.environ.get("HF_TOKEN")
13
  if not HF_TOKEN:
14
- raise ValueError("Missing Hugging Face API token. Please set HF_TOKEN as an environment variable in Hugging Face Secrets.")
15
 
16
- # Load the Nomic-Embed Model from Hugging Face with trust_remote_code=True
 
 
 
 
 
 
17
  EMBEDDING_MODEL = "nomic-ai/nomic-embed-text-v1.5"
18
  embedder = SentenceTransformer(EMBEDDING_MODEL, trust_remote_code=True)
19
 
20
- # Download database from Hugging Face Datasets if not exists
21
  db_filename = "hpo_genes.db"
22
  db_repo = "UoS-HGIG/hpo_genes"
23
  db_path = os.path.join(os.getcwd(), db_filename)
@@ -25,6 +31,7 @@ db_path = os.path.join(os.getcwd(), db_filename)
25
  if not os.path.exists(db_path):
26
  db_path = hf_hub_download(repo_id=db_repo, filename=db_filename, repo_type="dataset", use_auth_token=HF_TOKEN)
27
 
 
28
  def find_best_hpo_match(finding, region, threshold):
29
  """Finds the best HPO match using semantic similarity."""
30
  query_text = f"{finding} in {region}"
@@ -45,7 +52,8 @@ def find_best_hpo_match(finding, region, threshold):
45
  best_match = {"hpo_id": hpo_id, "hpo_term": hpo_name}
46
 
47
  conn.close()
48
- return best_match if best_score > threshold else None # Adjust threshold based on user input
 
49
 
50
  def get_genes_for_hpo(hpo_id):
51
  """Retrieves associated genes for a given HPO ID."""
@@ -56,46 +64,35 @@ def get_genes_for_hpo(hpo_id):
56
  conn.close()
57
  return result[0].split(", ") if result else []
58
 
59
- def extract_with_ontogpt(finding, region):
60
- """Uses OntoGPT CLI to extract ontology terms."""
61
- input_text = f"{finding} observed in {region}."
62
-
63
- try:
64
- # Run OntoGPT extraction (modify parameters as needed)
65
- result = subprocess.run(
66
- ["ontogpt", "extract", "-t", "hpo", "-m", "meta-llama/Llama-3.1-70B-Instruct"],
67
- input=input_text,
68
- text=True,
69
- capture_output=True
70
- )
71
- return result.stdout.strip() # Return extracted ontology term
72
- except Exception as e:
73
- return f"Error running OntoGPT: {str(e)}"
74
 
75
  def get_hpo_for_finding(finding, region, threshold):
76
- """Finds the best HPO term and retrieves associated genes, enriched with OntoGPT."""
77
  hpo_match = find_best_hpo_match(finding, region, threshold)
78
-
79
  if hpo_match:
80
- hpo_id = hpo_match["hpo_id"]
81
- hpo_match["genes"] = get_genes_for_hpo(hpo_id)
82
-
83
- # Use OntoGPT to refine the mapping
84
- enriched_description = extract_with_ontogpt(finding, region)
85
- hpo_match["description"] = enriched_description
86
-
87
  else:
88
- hpo_match = {"hpo_id": "NA", "hpo_term": "NA", "genes": [], "description": "No match found."}
89
-
90
  return hpo_match
91
 
 
 
 
 
 
 
 
 
 
 
92
  def hpo_mapper_ui(finding, region, threshold):
93
- """Function for Gradio UI to get HPO mappings."""
94
  if not finding or not region:
95
- return "Please enter both finding and region.", "", "", ""
 
 
 
96
 
97
- result = get_hpo_for_finding(finding, region, threshold)
98
- return result["hpo_id"], result["hpo_term"], ", ".join(result["genes"]), result["description"]
99
 
100
  # Create Gradio UI
101
  demo = gr.Interface(
@@ -109,7 +106,7 @@ demo = gr.Interface(
109
  gr.Textbox(label="HPO ID"),
110
  gr.Textbox(label="HPO Term"),
111
  gr.Textbox(label="Associated Genes"),
112
- gr.Textbox(label="OntoGPT Description") # New field for enriched ontology output
113
  ],
114
  title="HPO Mapper with OntoGPT",
115
  description=(
@@ -120,6 +117,7 @@ demo = gr.Interface(
120
  "Alex Z Kadhim, Zachary Green, Iman Nazari, Jonathan Baker, Michael George, Ashley Heinson, Matt Stammers, Christopher Kipps, R Mark Beattie, James J Ashton, Sarah Ennis\n"
121
  "medRxiv 2025.03.07.25323569; [DOI: 10.1101/2025.03.07.25323569](https://doi.org/10.1101/2025.03.07.25323569)"
122
  )
 
123
  )
124
 
125
  if __name__ == "__main__":
 
2
  import sqlite3
3
  import json
4
  import numpy as np
 
5
  from numpy.linalg import norm
6
  from huggingface_hub import hf_hub_download
7
  from sentence_transformers import SentenceTransformer
8
  import os
9
+ import subprocess
10
 
11
  # Get Hugging Face Token from Environment Variables
12
  HF_TOKEN = os.environ.get("HF_TOKEN")
13
  if not HF_TOKEN:
14
+ raise ValueError("Missing Hugging Face API token. Please set HF_TOKEN as an environment variable.")
15
 
16
+ # Set Hugging Face API key for OntoGPT
17
+ subprocess.run(["runoak", "set-apikey", "-e", "huggingface-key", HF_TOKEN], check=True)
18
+
19
+ # Define OntoGPT model
20
+ ONTOGPT_MODEL = "huggingface/WizardLM/WizardCoder-Python-34B-V1.0"
21
+
22
+ # Load the Nomic-Embed Model
23
  EMBEDDING_MODEL = "nomic-ai/nomic-embed-text-v1.5"
24
  embedder = SentenceTransformer(EMBEDDING_MODEL, trust_remote_code=True)
25
 
26
+ # Download database from Hugging Face if not exists
27
  db_filename = "hpo_genes.db"
28
  db_repo = "UoS-HGIG/hpo_genes"
29
  db_path = os.path.join(os.getcwd(), db_filename)
 
31
  if not os.path.exists(db_path):
32
  db_path = hf_hub_download(repo_id=db_repo, filename=db_filename, repo_type="dataset", use_auth_token=HF_TOKEN)
33
 
34
+
35
  def find_best_hpo_match(finding, region, threshold):
36
  """Finds the best HPO match using semantic similarity."""
37
  query_text = f"{finding} in {region}"
 
52
  best_match = {"hpo_id": hpo_id, "hpo_term": hpo_name}
53
 
54
  conn.close()
55
+ return best_match if best_score > threshold else None
56
+
57
 
58
  def get_genes_for_hpo(hpo_id):
59
  """Retrieves associated genes for a given HPO ID."""
 
64
  conn.close()
65
  return result[0].split(", ") if result else []
66
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
 
68
  def get_hpo_for_finding(finding, region, threshold):
69
+ """Finds the best HPO term and retrieves associated genes."""
70
  hpo_match = find_best_hpo_match(finding, region, threshold)
 
71
  if hpo_match:
72
+ hpo_match["genes"] = get_genes_for_hpo(hpo_match["hpo_id"])
 
 
 
 
 
 
73
  else:
74
+ hpo_match = {"hpo_id": "NA", "hpo_term": "NA", "genes": []}
 
75
  return hpo_match
76
 
77
+
78
+ def run_ontogpt(finding, region):
79
+ """Runs OntoGPT to extract information."""
80
+ input_text = f"{finding} in {region}"
81
+ result = subprocess.run([
82
+ "ontogpt", "complete", "-m", ONTOGPT_MODEL, "-i", input_text
83
+ ], capture_output=True, text=True)
84
+ return result.stdout.strip()
85
+
86
+
87
  def hpo_mapper_ui(finding, region, threshold):
88
+ """Function for Gradio UI to get HPO mappings and OntoGPT results."""
89
  if not finding or not region:
90
+ return "Please enter both finding and region.", "", ""
91
+
92
+ hpo_result = get_hpo_for_finding(finding, region, threshold)
93
+ ontogpt_output = run_ontogpt(finding, region)
94
 
95
+ return hpo_result["hpo_id"], hpo_result["hpo_term"], ", ".join(hpo_result["genes"]), ontogpt_output
 
96
 
97
  # Create Gradio UI
98
  demo = gr.Interface(
 
106
  gr.Textbox(label="HPO ID"),
107
  gr.Textbox(label="HPO Term"),
108
  gr.Textbox(label="Associated Genes"),
109
+ gr.Textbox(label="OntoGPT Output")
110
  ],
111
  title="HPO Mapper with OntoGPT",
112
  description=(
 
117
  "Alex Z Kadhim, Zachary Green, Iman Nazari, Jonathan Baker, Michael George, Ashley Heinson, Matt Stammers, Christopher Kipps, R Mark Beattie, James J Ashton, Sarah Ennis\n"
118
  "medRxiv 2025.03.07.25323569; [DOI: 10.1101/2025.03.07.25323569](https://doi.org/10.1101/2025.03.07.25323569)"
119
  )
120
+
121
  )
122
 
123
  if __name__ == "__main__":