akadhim commited on
Commit
b859fb5
·
verified ·
1 Parent(s): 2a861c0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -18
app.py CHANGED
@@ -2,15 +2,26 @@ import gradio as gr
2
  import sqlite3
3
  import json
4
  import numpy as np
5
- import numpy as np
6
  from sentence_transformers import SentenceTransformer
 
7
 
8
  EMBEDDING_MODEL = "nomic-ai/nomic-embed-text-v1.5"
9
  embedder = SentenceTransformer(EMBEDDING_MODEL, trust_remote_code=True)
10
 
11
- # Ensure the database file exists locally.
12
- db_path = "hpo_genes.db"
 
 
 
 
 
13
 
 
 
 
 
 
14
 
15
  def find_best_hpo(finding, region, threshold):
16
  query = f"{finding} {region}" if region else finding
@@ -18,21 +29,28 @@ def find_best_hpo(finding, region, threshold):
18
 
19
  conn = sqlite3.connect(db_path)
20
  cursor = conn.cursor()
21
- cursor.execute("SELECT hpo_id, hpo_name, embedding FROM hpo_embeddings")
 
 
 
 
 
22
 
23
  best_match, best_score = None, -1
24
 
25
  for hpo_id, hpo_name, embedding_str in cursor.fetchall():
26
  hpo_embedding = np.array(json.loads(embedding_str))
27
- score = np.dot(query_embedding, hpo_embedding)
 
 
28
 
29
- if score > best_score:
30
- best_score = score
31
  best_match = {"hpo_id": hpo_id, "hpo_name": hpo_name}
32
 
 
33
  return best_match if best_score >= threshold else None
34
 
35
-
36
  def get_genes(hpo_id):
37
  conn = sqlite3.connect(db_path)
38
  cursor = conn.cursor()
@@ -41,26 +59,27 @@ def get_genes(hpo_id):
41
  conn.close()
42
  return genes
43
 
44
-
45
  def hpo_mapper(finding, region, threshold):
46
  if not finding:
47
  return "Please provide a pathological finding.", "", ""
48
 
49
- match = find_best_hpo(finding, region, threshold)
 
 
 
50
 
51
  if match:
52
  genes = get_genes(match["hpo_id"])
53
  return match["hpo_id"], match["hpo_name"], ", ".join(genes)
54
 
55
- return "No match found", "", ""
56
-
57
 
58
  demo = gr.Interface(
59
  fn=hpo_mapper,
60
  inputs=[
61
  gr.Textbox(label="Pathological Finding"),
62
  gr.Textbox(label="Anatomical Region (optional)"),
63
- gr.Slider(0.5, 1.0, step=0.01, value=0.74, label="Similarity Threshold")
64
  ],
65
  outputs=[
66
  gr.Textbox(label="HPO ID"),
@@ -73,12 +92,15 @@ demo = gr.Interface(
73
  '(e.g. "terminal ileum") to map it to the closest Human Phenotype Ontology (HPO) '
74
  'term and retrieve associated genes.\n\n'
75
  '**Reference:**\n'
76
- 'Application of Generative Artificial Intelligence to Utilise Unstructured Clinical Data for Acceleration of Inflammatory Bowel Disease Research\n'
77
- 'Alex Z Kadhim, Zachary Green, Iman Nazari, Jonathan Baker, Michael George, Ashley Heinson, Matt Stammers, Christopher M Kipps, R Mark Beattie, James J Ashton, Sarah Ennis\n'
78
- 'medRxiv 2025.03.07.25323569; doi: [https://doi.org/10.1101/2025.03.07.25323569](https://doi.org/10.1101/2025.03.07.25323569)\n\n'
 
 
 
79
  'HPO to gene mappings obtained from [Jax](https://hpo.jax.org/data/annotations)'
80
- )
81
  )
82
 
83
  if __name__ == "__main__":
84
- demo.launch()
 
2
  import sqlite3
3
  import json
4
  import numpy as np
5
+ import os
6
  from sentence_transformers import SentenceTransformer
7
+ from huggingface_hub import hf_hub_download
8
 
9
  EMBEDDING_MODEL = "nomic-ai/nomic-embed-text-v1.5"
10
  embedder = SentenceTransformer(EMBEDDING_MODEL, trust_remote_code=True)
11
 
12
+ HF_TOKEN = os.environ.get("HF_TOKEN")
13
+ if not HF_TOKEN:
14
+ raise ValueError("Missing Hugging Face API token.")
15
+
16
+ db_repo = "UoS-HGIG/hpo_genes"
17
+ db_filename = "hpo_genes.db"
18
+ db_path = db_filename
19
 
20
+ # Download database if not present locally
21
+ if not os.path.exists(db_path):
22
+ db_path = hf_hub_download(
23
+ repo_id=db_repo, filename=db_filename, repo_type="dataset", use_auth_token=HF_TOKEN
24
+ )
25
 
26
  def find_best_hpo(finding, region, threshold):
27
  query = f"{finding} {region}" if region else finding
 
29
 
30
  conn = sqlite3.connect(db_path)
31
  cursor = conn.cursor()
32
+
33
+ try:
34
+ cursor.execute("SELECT hpo_id, hpo_name, embedding FROM hpo_embeddings")
35
+ except sqlite3.OperationalError as e:
36
+ conn.close()
37
+ raise ValueError(f"Database error: {e}")
38
 
39
  best_match, best_score = None, -1
40
 
41
  for hpo_id, hpo_name, embedding_str in cursor.fetchall():
42
  hpo_embedding = np.array(json.loads(embedding_str))
43
+ similarity = np.dot(query_embedding, hpo_embedding) / (
44
+ np.linalg.norm(query_embedding) * np.linalg.norm(hpo_embedding)
45
+ )
46
 
47
+ if similarity > best_score:
48
+ best_score = similarity
49
  best_match = {"hpo_id": hpo_id, "hpo_name": hpo_name}
50
 
51
+ conn.close()
52
  return best_match if best_score >= threshold else None
53
 
 
54
  def get_genes(hpo_id):
55
  conn = sqlite3.connect(db_path)
56
  cursor = conn.cursor()
 
59
  conn.close()
60
  return genes
61
 
 
62
  def hpo_mapper(finding, region, threshold):
63
  if not finding:
64
  return "Please provide a pathological finding.", "", ""
65
 
66
+ try:
67
+ match = find_best_hpo(finding, region, threshold)
68
+ except ValueError as e:
69
+ return str(e), "", ""
70
 
71
  if match:
72
  genes = get_genes(match["hpo_id"])
73
  return match["hpo_id"], match["hpo_name"], ", ".join(genes)
74
 
75
+ return "No match found.", "", ""
 
76
 
77
  demo = gr.Interface(
78
  fn=hpo_mapper,
79
  inputs=[
80
  gr.Textbox(label="Pathological Finding"),
81
  gr.Textbox(label="Anatomical Region (optional)"),
82
+ gr.Slider(0.5, 1.0, step=0.01, value=0.74, label="Similarity Threshold"),
83
  ],
84
  outputs=[
85
  gr.Textbox(label="HPO ID"),
 
92
  '(e.g. "terminal ileum") to map it to the closest Human Phenotype Ontology (HPO) '
93
  'term and retrieve associated genes.\n\n'
94
  '**Reference:**\n'
95
+ 'Application of Generative Artificial Intelligence to Utilise Unstructured Clinical Data '
96
+ 'for Acceleration of Inflammatory Bowel Disease Research\n'
97
+ 'Alex Z Kadhim, Zachary Green, Iman Nazari, Jonathan Baker, Michael George, Ashley Heinson, '
98
+ 'Matt Stammers, Christopher M Kipps, R Mark Beattie, James J Ashton, Sarah Ennis\n'
99
+ 'medRxiv 2025.03.07.25323569; doi: '
100
+ '[https://doi.org/10.1101/2025.03.07.25323569](https://doi.org/10.1101/2025.03.07.25323569)\n\n'
101
  'HPO to gene mappings obtained from [Jax](https://hpo.jax.org/data/annotations)'
102
+ ),
103
  )
104
 
105
  if __name__ == "__main__":
106
+ demo.launch()