Spaces:
Running
Running
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,83 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import ollama
|
3 |
+
import sqlite3
|
4 |
+
import json
|
5 |
+
import numpy as np
|
6 |
+
from numpy.linalg import norm
|
7 |
+
from huggingface_hub import hf_hub_download
|
8 |
+
import os
|
9 |
+
|
10 |
+
# Download database from Hugging Face if not exists
|
11 |
+
db_filename = "hpo_genes.db"
|
12 |
+
db_repo = "UoS-HGIG/hpo_genes"
|
13 |
+
db_path = os.path.join(os.getcwd(), db_filename)
|
14 |
+
|
15 |
+
if not os.path.exists(db_path):
|
16 |
+
db_path = hf_hub_download(repo_id=db_repo, filename=db_filename)
|
17 |
+
|
18 |
+
def find_best_hpo_match(finding, region):
|
19 |
+
"""Finds the best HPO match using semantic similarity."""
|
20 |
+
query_text = f"{finding} in {region}"
|
21 |
+
query_embedding = np.array(ollama.embeddings(model='nomic-embed-text', prompt=query_text)["embedding"])
|
22 |
+
|
23 |
+
conn = sqlite3.connect(db_path)
|
24 |
+
cursor = conn.cursor()
|
25 |
+
cursor.execute("SELECT hpo_id, hpo_name, embedding FROM hpo_embeddings")
|
26 |
+
|
27 |
+
best_match, best_score = None, -1
|
28 |
+
|
29 |
+
for hpo_id, hpo_name, embedding_str in cursor.fetchall():
|
30 |
+
hpo_embedding = np.array(json.loads(embedding_str))
|
31 |
+
similarity = np.dot(query_embedding, hpo_embedding) / (norm(query_embedding) * norm(hpo_embedding))
|
32 |
+
|
33 |
+
if similarity > best_score:
|
34 |
+
best_score = similarity
|
35 |
+
best_match = {"hpo_id": hpo_id, "hpo_term": hpo_name}
|
36 |
+
|
37 |
+
conn.close()
|
38 |
+
return best_match if best_score > 0.74 else None # Adjust threshold as needed
|
39 |
+
|
40 |
+
|
41 |
+
def get_genes_for_hpo(hpo_id):
|
42 |
+
"""Retrieves associated genes for a given HPO ID."""
|
43 |
+
conn = sqlite3.connect(db_path)
|
44 |
+
cursor = conn.cursor()
|
45 |
+
cursor.execute("SELECT genes FROM hpo_gene WHERE hpo_id = ?", (hpo_id,))
|
46 |
+
result = cursor.fetchone()
|
47 |
+
conn.close()
|
48 |
+
return result[0].split(", ") if result else []
|
49 |
+
|
50 |
+
|
51 |
+
def get_hpo_for_finding(finding, region):
|
52 |
+
"""Finds the best HPO term and retrieves associated genes."""
|
53 |
+
hpo_match = find_best_hpo_match(finding, region)
|
54 |
+
if hpo_match:
|
55 |
+
hpo_match["genes"] = get_genes_for_hpo(hpo_match["hpo_id"])
|
56 |
+
else:
|
57 |
+
hpo_match = {"hpo_id": "NA", "hpo_term": "NA", "genes": []}
|
58 |
+
return hpo_match
|
59 |
+
|
60 |
+
|
61 |
+
def hpo_mapper_ui(finding, region):
|
62 |
+
"""Function for Gradio UI to get HPO mappings."""
|
63 |
+
if not finding or not region:
|
64 |
+
return "Please enter both finding and region.", "", ""
|
65 |
+
|
66 |
+
result = get_hpo_for_finding(finding, region)
|
67 |
+
return result["hpo_id"], result["hpo_term"], ", ".join(result["genes"])
|
68 |
+
|
69 |
+
# Create Gradio UI
|
70 |
+
demo = gr.Interface(
|
71 |
+
fn=hpo_mapper_ui,
|
72 |
+
inputs=[gr.Textbox(label="Finding"), gr.Textbox(label="Region")],
|
73 |
+
outputs=[
|
74 |
+
gr.Textbox(label="HPO ID"),
|
75 |
+
gr.Textbox(label="HPO Term"),
|
76 |
+
gr.Textbox(label="Associated Genes")
|
77 |
+
],
|
78 |
+
title="HPO Mapper",
|
79 |
+
description="Enter a clinical finding and anatomical region to get the best-matching HPO term and associated genes."
|
80 |
+
)
|
81 |
+
|
82 |
+
if __name__ == "__main__":
|
83 |
+
demo.launch()
|