akadhim commited on
Commit
dc4a259
·
verified ·
1 Parent(s): 2875c81

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -28
app.py CHANGED
@@ -3,35 +3,18 @@ import sqlite3
3
  import json
4
  import numpy as np
5
  from numpy.linalg import norm
6
- from huggingface_hub import hf_hub_download, snapshot_download
7
  from sentence_transformers import SentenceTransformer
8
  import os
9
- import logging
10
-
11
- # Set up logging in a hidden directory
12
- log_dir = os.path.expanduser("~/.logs")
13
- os.makedirs(log_dir, exist_ok=True)
14
- log_file = os.path.join(log_dir, "hpo_mapper.log")
15
-
16
- logging.basicConfig(
17
- filename=log_file,
18
- level=logging.INFO,
19
- format="%(asctime)s - %(levelname)s - %(message)s"
20
- )
21
 
22
  # Get Hugging Face Token from Environment Variables
23
  HF_TOKEN = os.environ.get("HF_TOKEN")
24
  if not HF_TOKEN:
25
  raise ValueError("Missing Hugging Face API token. Please set HF_TOKEN as an environment variable in Hugging Face Secrets.")
26
 
27
- # Load the Nomic-Embed Model from Hugging Face with forced download
28
  EMBEDDING_MODEL = "nomic-ai/nomic-embed-text-v1.5"
29
- try:
30
- model_path = snapshot_download(EMBEDDING_MODEL, trust_remote_code=True, force_download=True)
31
- embedder = SentenceTransformer(model_path, trust_remote_code=True)
32
- except Exception as e:
33
- logging.error(f"Failed to load model: {e}")
34
- raise
35
 
36
  # Download database from Hugging Face Datasets if it does not exist
37
  db_filename = "hpo_genes.db"
@@ -41,6 +24,7 @@ db_path = os.path.join(os.getcwd(), db_filename)
41
  if not os.path.exists(db_path):
42
  db_path = hf_hub_download(repo_id=db_repo, filename=db_filename, repo_type="dataset", use_auth_token=HF_TOKEN)
43
 
 
44
  def find_best_hpo_match(finding, region, threshold):
45
  query_text = f"{finding} in {region}" if region else finding
46
  query_embedding = embedder.encode(query_text)
@@ -62,6 +46,7 @@ def find_best_hpo_match(finding, region, threshold):
62
  conn.close()
63
  return best_match if best_score >= threshold else None
64
 
 
65
  def get_genes_for_hpo(hpo_id):
66
  conn = sqlite3.connect(db_path)
67
  cursor = conn.cursor()
@@ -70,6 +55,7 @@ def get_genes_for_hpo(hpo_id):
70
  conn.close()
71
  return result[0].split(", ") if result else []
72
 
 
73
  def hpo_mapper_ui(finding, region, threshold):
74
  if not finding:
75
  return "Please enter a pathological finding.", "", ""
@@ -78,19 +64,17 @@ def hpo_mapper_ui(finding, region, threshold):
78
 
79
  if match:
80
  genes = get_genes_for_hpo(match["hpo_id"])
81
- output = (match["hpo_id"], match["hpo_term"], ", ".join(genes))
82
- logging.info(f"Input: Finding='{finding}', Region='{region}', Threshold={threshold} -> Output: {output}")
83
- return output
84
-
85
- logging.info(f"Input: Finding='{finding}', Region='{region}', Threshold={threshold} -> No match found.")
86
  return "No match found.", "", ""
87
 
 
88
  demo = gr.Interface(
89
  fn=hpo_mapper_ui,
90
  inputs=[
91
  gr.Textbox(label="Pathological Finding"),
92
  gr.Textbox(label="Anatomical Region (optional)"),
93
- gr.Slider(0.5, 1.0, step=0.01, value=0.74, label="Similarity Threshold")
94
  ],
95
  outputs=[
96
  gr.Textbox(label="HPO ID"),
@@ -101,9 +85,17 @@ demo = gr.Interface(
101
  description=(
102
  'Enter a pathological finding (e.g., "chronic inflammation") and anatomical region '
103
  '(e.g., "terminal ileum") to map it to the closest Human Phenotype Ontology (HPO) '
104
- 'term and retrieve genes annotated as being associated with this HPO term.'
 
 
 
 
 
 
 
 
105
  )
106
  )
107
 
108
  if __name__ == "__main__":
109
- demo.launch()
 
3
  import json
4
  import numpy as np
5
  from numpy.linalg import norm
6
+ from huggingface_hub import hf_hub_download
7
  from sentence_transformers import SentenceTransformer
8
  import os
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
  # Get Hugging Face Token from Environment Variables
11
  HF_TOKEN = os.environ.get("HF_TOKEN")
12
  if not HF_TOKEN:
13
  raise ValueError("Missing Hugging Face API token. Please set HF_TOKEN as an environment variable in Hugging Face Secrets.")
14
 
15
+ # Load the Nomic-Embed Model from Hugging Face
16
  EMBEDDING_MODEL = "nomic-ai/nomic-embed-text-v1.5"
17
+ embedder = SentenceTransformer(EMBEDDING_MODEL, trust_remote_code=True)
 
 
 
 
 
18
 
19
  # Download database from Hugging Face Datasets if it does not exist
20
  db_filename = "hpo_genes.db"
 
24
  if not os.path.exists(db_path):
25
  db_path = hf_hub_download(repo_id=db_repo, filename=db_filename, repo_type="dataset", use_auth_token=HF_TOKEN)
26
 
27
+
28
  def find_best_hpo_match(finding, region, threshold):
29
  query_text = f"{finding} in {region}" if region else finding
30
  query_embedding = embedder.encode(query_text)
 
46
  conn.close()
47
  return best_match if best_score >= threshold else None
48
 
49
+
50
  def get_genes_for_hpo(hpo_id):
51
  conn = sqlite3.connect(db_path)
52
  cursor = conn.cursor()
 
55
  conn.close()
56
  return result[0].split(", ") if result else []
57
 
58
+
59
  def hpo_mapper_ui(finding, region, threshold):
60
  if not finding:
61
  return "Please enter a pathological finding.", "", ""
 
64
 
65
  if match:
66
  genes = get_genes_for_hpo(match["hpo_id"])
67
+ return match["hpo_id"], match["hpo_term"], ", ".join(genes)
68
+
 
 
 
69
  return "No match found.", "", ""
70
 
71
+
72
  demo = gr.Interface(
73
  fn=hpo_mapper_ui,
74
  inputs=[
75
  gr.Textbox(label="Pathological Finding"),
76
  gr.Textbox(label="Anatomical Region (optional)"),
77
+ gr.Slider(0.0, 1.0, step=0.01, value=0.76, label="Similarity Threshold")
78
  ],
79
  outputs=[
80
  gr.Textbox(label="HPO ID"),
 
85
  description=(
86
  'Enter a pathological finding (e.g., "chronic inflammation") and anatomical region '
87
  '(e.g., "terminal ileum") to map it to the closest Human Phenotype Ontology (HPO) '
88
+ 'term and retrieve genes annotated as being associated with this HPO term.\n\n'
89
+ '**References:**\n'
90
+ 'Kadhim, A. Z., Green, Z., Nazari, I., Baker, J., George, M., Heinson, A., Stammers, M., Kipps, C., Beattie, R. M., Ashton, J. J., & Ennis, S. (2025).\n'
91
+ 'Application of generative artificial intelligence to utilise unstructured clinical data for acceleration of inflammatory bowel disease research.\n'
92
+ '*medRxiv*. [https://doi.org/10.1101/2025.03.07.25323569](https://doi.org/10.1101/2025.03.07.25323569)\n\n'
93
+ 'Gargano, M. A., Matentzoglu, N., Coleman, B., Addo-Lartey, E. B., Anagnostopoulos, A. V., Anderton, J., Avillach, P., Bagley, A. M., Bakštein, E., Balhoff, J. P., Baynam, G., Bello, S. M., Berk, M., Bertram, H., Bishop, S., Blau, H., Bodenstein, D. F., Botas, P., Boztug, K., Čady, J., … Robinson, P. N. (2024)\n'
94
+ 'The Human Phenotype Ontology in 2024: phenotypes around the world.\n'
95
+ '*Nucleic Acids Research* [https://doi.org/10.1093/nar/gkad1005](https://doi.org/10.1093/nar/gkad1005)\n\n'
96
+ 'HPO to gene mappings obtained from [Jax](https://hpo.jax.org/data/annotations)'
97
  )
98
  )
99
 
100
  if __name__ == "__main__":
101
+ demo.launch()