akadhim commited on
Commit
b71fded
·
verified ·
1 Parent(s): b27db6f

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +83 -0
app.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import ollama
3
+ import sqlite3
4
+ import json
5
+ import numpy as np
6
+ from numpy.linalg import norm
7
+ from huggingface_hub import hf_hub_download
8
+ import os
9
+
10
+ # Download database from Hugging Face if not exists
11
+ db_filename = "hpo_genes.db"
12
+ db_repo = "UoS-HGIG/hpo_genes"
13
+ db_path = os.path.join(os.getcwd(), db_filename)
14
+
15
+ if not os.path.exists(db_path):
16
+ db_path = hf_hub_download(repo_id=db_repo, filename=db_filename)
17
+
18
+ def find_best_hpo_match(finding, region):
19
+ """Finds the best HPO match using semantic similarity."""
20
+ query_text = f"{finding} in {region}"
21
+ query_embedding = np.array(ollama.embeddings(model='nomic-embed-text', prompt=query_text)["embedding"])
22
+
23
+ conn = sqlite3.connect(db_path)
24
+ cursor = conn.cursor()
25
+ cursor.execute("SELECT hpo_id, hpo_name, embedding FROM hpo_embeddings")
26
+
27
+ best_match, best_score = None, -1
28
+
29
+ for hpo_id, hpo_name, embedding_str in cursor.fetchall():
30
+ hpo_embedding = np.array(json.loads(embedding_str))
31
+ similarity = np.dot(query_embedding, hpo_embedding) / (norm(query_embedding) * norm(hpo_embedding))
32
+
33
+ if similarity > best_score:
34
+ best_score = similarity
35
+ best_match = {"hpo_id": hpo_id, "hpo_term": hpo_name}
36
+
37
+ conn.close()
38
+ return best_match if best_score > 0.74 else None # Adjust threshold as needed
39
+
40
+
41
+ def get_genes_for_hpo(hpo_id):
42
+ """Retrieves associated genes for a given HPO ID."""
43
+ conn = sqlite3.connect(db_path)
44
+ cursor = conn.cursor()
45
+ cursor.execute("SELECT genes FROM hpo_gene WHERE hpo_id = ?", (hpo_id,))
46
+ result = cursor.fetchone()
47
+ conn.close()
48
+ return result[0].split(", ") if result else []
49
+
50
+
51
+ def get_hpo_for_finding(finding, region):
52
+ """Finds the best HPO term and retrieves associated genes."""
53
+ hpo_match = find_best_hpo_match(finding, region)
54
+ if hpo_match:
55
+ hpo_match["genes"] = get_genes_for_hpo(hpo_match["hpo_id"])
56
+ else:
57
+ hpo_match = {"hpo_id": "NA", "hpo_term": "NA", "genes": []}
58
+ return hpo_match
59
+
60
+
61
+ def hpo_mapper_ui(finding, region):
62
+ """Function for Gradio UI to get HPO mappings."""
63
+ if not finding or not region:
64
+ return "Please enter both finding and region.", "", ""
65
+
66
+ result = get_hpo_for_finding(finding, region)
67
+ return result["hpo_id"], result["hpo_term"], ", ".join(result["genes"])
68
+
69
+ # Create Gradio UI
70
+ demo = gr.Interface(
71
+ fn=hpo_mapper_ui,
72
+ inputs=[gr.Textbox(label="Finding"), gr.Textbox(label="Region")],
73
+ outputs=[
74
+ gr.Textbox(label="HPO ID"),
75
+ gr.Textbox(label="HPO Term"),
76
+ gr.Textbox(label="Associated Genes")
77
+ ],
78
+ title="HPO Mapper",
79
+ description="Enter a clinical finding and anatomical region to get the best-matching HPO term and associated genes."
80
+ )
81
+
82
+ if __name__ == "__main__":
83
+ demo.launch()