Spaces:

Gla-AI4BioMed-Lab
/

FusionDTI

Running

App Files Files Community

Gla-AI4BioMed-Lab commited on 25 days ago

Commit

b7ce511

verified ·

1 Parent(s): 44cf989

Update app.py

Browse files

Files changed (1) hide show

app.py +363 -277

app.py CHANGED Viewed

@@ -1,36 +1,82 @@
-import os, sys, argparse, tempfile, shutil, base64, io
-from flask import Flask, request, render_template_string
-from werkzeug.utils import secure_filename
-from torch.utils.data import DataLoader
-import selfies
-from rdkit import Chem
-import app as gr
 import torch
 import matplotlib
 matplotlib.use("Agg")
 import matplotlib.pyplot as plt
 from matplotlib import cm
 from typing import Optional
-from utils.drug_tokenizer import DrugTokenizer
 from transformers import EsmForMaskedLM, EsmTokenizer, AutoModel
 from utils.metric_learning_models_att_maps import Pre_encoded, FusionDTI
 from utils.foldseek_util import get_struc_seq
-# ───── global paths / args ──────────────────────────────────────
-FOLDSEEK_BIN = shutil.which("foldseek")
-os.environ["TOKENIZERS_PARALLELISM"] = "false"
-sys.path.append("..")
 def parse_config():
     p = argparse.ArgumentParser()
-    p.add_argument("-f")
     p.add_argument("--prot_encoder_path", default="westlake-repl/SaProt_650M_AF2")
     p.add_argument("--drug_encoder_path", default="HUBioDataLab/SELFormer")
-    p.add_argument("--agg_mode", default="mean_all_tok", type=str, help="{cls|mean|mean_all_tok}")
     p.add_argument("--group_size", type=int, default=1)
-    p.add_argument("--lr", type=float, default=1e-4)
     p.add_argument("--fusion", default="CAN")
     p.add_argument("--device", default="cuda" if torch.cuda.is_available() else "cpu")
     p.add_argument("--save_path_prefix", default="save_model_ckp/")
@@ -40,16 +86,13 @@ def parse_config():
 args = parse_config()
 DEVICE = args.device
-# ───── tokenisers & encoders ────────────────────────────────────
 prot_tokenizer = EsmTokenizer.from_pretrained(args.prot_encoder_path)
 prot_model     = EsmForMaskedLM.from_pretrained(args.prot_encoder_path)
-drug_tokenizer = DrugTokenizer()        # SELFIES
 drug_model     = AutoModel.from_pretrained(args.drug_encoder_path)
-encoding = Pre_encoded(prot_model, drug_model, args).to(DEVICE)
-# ─── collate fn ────────────────────────────────────────────────
 def collate_fn(batch):
     query1, query2, scores = zip(*batch)
@@ -75,20 +118,8 @@ def collate_fn(batch):
     attention_mask2 = query_encodings2["attention_mask"].bool()
     return query_encodings1["input_ids"], attention_mask1, query_encodings2["input_ids"], attention_mask2, scores
-# def collate_fn_batch_encoding(batch):
-def smiles_to_selfies(smiles: str) -> Optional[str]:
-    try:
-        mol = Chem.MolFromSmiles(smiles)
-        if mol is None:
-            return None
-        selfies_str = selfies.encoder(smiles)
-        return selfies_str
-    except Exception:
-        return None
-# ───── single-case embedding ───────────────────────────────────
 def get_case_feature(model, loader):
     model.eval()
     with torch.no_grad():
@@ -100,17 +131,12 @@ def get_case_feature(model, loader):
                      p_ids.cpu(), d_ids.cpu(),
                      p_mask.cpu(), d_mask.cpu(), None)]
-# ───── helper：过滤特殊 token ───────────────────────────────────
-def clean_tokens(ids, tokenizer):
-    toks = tokenizer.convert_ids_to_tokens(ids.tolist())
-    return [t for t in toks if t not in tokenizer.all_special_tokens]
-# ───── visualisation ───────────────────────────────────────────
 def visualize_attention(model, feats, drug_idx: Optional[int] = None) -> str:
     """
     Render a Protein → Drug cross-attention heat-map and, optionally, a
-    Top-20 protein-residue table for a chosen drug-token index.
     The token index shown on the x-axis (and accepted via *drug_idx*) is **the
     position of that token in the *original* drug sequence**, *after* the
@@ -209,8 +235,8 @@ def visualize_attention(model, feats, drug_idx: Optional[int] = None) -> str:
         plt.close(fig)
         html = f'<img src="data:image/png;base64,{base64.b64encode(buf.getvalue()).decode()}" />'
-        # ───────────────────── 生成 Top-20 表（若需要） ─────────────────────
-        table_html = ""                   # 先设空串，方便后面统一拼接
         if drug_idx is not None:
             # map original 0-based drug_idx → current column position
             if (drug_idx + 1) in d_indices:
@@ -222,7 +248,7 @@ def visualize_attention(model, feats, drug_idx: Optional[int] = None) -> str:
             if col_pos is not None:
                 col_vec = attn[:, col_pos]
-                topk    = torch.topk(col_vec, k=min(20, len(col_vec))).indices.tolist()
                 rank_hdr = "".join(f"<th>{r+1}</th>"         for r in range(len(topk)))
                 res_row  = "".join(f"<td>{p_tokens[i]}</td>" for i in topk)
@@ -230,24 +256,58 @@ def visualize_attention(model, feats, drug_idx: Optional[int] = None) -> str:
                 drug_tok_text = d_tokens[col_pos]
                 orig_idx      = d_indices[col_pos]
                 table_html = (
-                    f"<h4 style='margin-bottom:6px'>"
-                    f"Drug token #{orig_idx} <code>{drug_tok_text}</code> "
-                    f"→ Top-20 Protein residues</h4>"
-                    "<table class='tg' style='margin-bottom:8px'>"
-                    f"<tr><th>Rank</th>{rank_hdr}</tr>"
-                    f"<tr><td>Residue</td>{res_row}</tr>"
-                    f"<tr><td>Position</td>{pos_row}</tr>"
-                    "</table>")
-        # ────────────────── 生成可放大 + 可下载的热图 ────────────────────
         buf_png = io.BytesIO()
-        fig.savefig(buf_png, format="png", dpi=140)   # 预览（光栅）
         buf_png.seek(0)
         buf_pdf = io.BytesIO()
-        fig.savefig(buf_pdf, format="pdf")            # 高清下载（矢量）
         buf_pdf.seek(0)
         plt.close(fig)
@@ -255,228 +315,254 @@ def visualize_attention(model, feats, drug_idx: Optional[int] = None) -> str:
         pdf_b64 = base64.b64encode(buf_pdf.getvalue()).decode()
         html_heat = (
-            f"<a href='data:image/png;base64,{png_b64}' target='_blank' "
-            f"title='Click to enlarge'>"
-            f"<img src='data:image/png;base64,{png_b64}' "
-            f"style='max-width:100%;height:auto;cursor:zoom-in' /></a>"
-            f"<div style='margin-top:6px'>"
-            f"<a href='data:application/pdf;base64,{pdf_b64}' "
-            f"download='attention_heatmap.pdf'>Download PDF</a></div>"
         )
-        # ───────────────────────── 返回最终 HTML ─────────────────────────
         return table_html + html_heat
-def inference(protein_seq, drug_seq, drug_idx, structure_file):
-    # —— 这一块换成 Gradio 取文件路径 ——
-    if structure_file is not None and os.path.exists(structure_file.name):
-        tmp_structure_path = structure_file.name
-    else:
-        return "<p style='color:red'>请先上传一个有效的 .pdb 或 .cif 文件。</p>"
-    # 调用 foldseek
-    try:
-        parsed = get_struc_seq(FOLDSEEK_BIN, tmp_structure_path, ["A"], plddt_mask=False)
-        chain = next(iter(parsed))
-        protein_seq = parsed[chain][2]
-    except Exception as e:
-        return f"<p style='color:red'>Foldseek 提取失败：{e}</p>"
-# ───── Flask app ───────────────────────────────────────────────
-app = Flask(__name__)
-@app.route("/", methods=["GET", "POST"])
-def index():
-    protein_seq = drug_seq = structure_seq = ""; result_html = None
-    tmp_structure_path = ""; drug_idx = None
-    if request.method == "POST":
-        drug_idx_raw = request.form.get("drug_idx", "")
-        drug_idx = int(drug_idx_raw)-1 if drug_idx_raw.isdigit() else None
-        struct = request.files.get("structure_file")
-        if struct and struct.filename:
-            tmp_dir = tempfile.mkdtemp(prefix="foldseek_")
-            safe_name = secure_filename(struct.filename)
-            tmp_structure_path = os.path.join(tmp_dir, safe_name)
-            struct.save(tmp_structure_path)
-        else:
-            tmp_structure_path = request.form.get("tmp_structure_path", "")
-        if "clear" in request.form:
-            protein_seq = drug_seq = structure_seq = ""; tmp_structure_path = ""
-        elif "confirm_structure" in request.form and tmp_structure_path:
-            try:
-                parsed_seqs = get_struc_seq(FOLDSEEK_BIN, tmp_structure_path, ["A"], plddt_mask=False)["A"]
-                seq, foldseek_seq, structure_seq = parsed_seqs        # 用完后清除目录
-            except Exception as e:
-                result_html = (
-                    "<p style='color:red'><strong>Foldseek failed to extract sequence "
-                    f"from structure: {e}</strong></p>")
-                structure_seq = ""
-            protein_seq = structure_seq
-            drug_input = request.form.get("drug_sequence", "")
-            # Heuristically check if input is SMILES (not starting with [) and convert
-            if not drug_input.strip().startswith("["):
-                converted = smiles_to_selfies(drug_input.strip())
-                if converted:
-                    drug_seq = converted
-                else:
-                    drug_seq = ""
-                    result_html = "<p style='color:red'><strong>Failed to convert SMILES to SELFIES. Please check the input string.</strong></p>"
-            else:
-                drug_seq = drug_input
-        elif "Inference" in request.form:
-            protein_seq = request.form.get("protein_sequence", "")
-            drug_seq    = request.form.get("drug_sequence", "")
-            if protein_seq and drug_seq:
-                loader = DataLoader([(protein_seq, drug_seq, 1)], batch_size=1,
-                                    collate_fn=collate_fn)
-                feats  = get_case_feature(encoding, loader)
-                model  = FusionDTI(446, 768, args).to(DEVICE)
-                ckpt   = os.path.join(f"{args.save_path_prefix}{args.dataset}_{args.fusion}",
-                                      "best_model.ckpt")
-                if os.path.isfile(ckpt):
-                    model.load_state_dict(torch.load(ckpt, map_location=DEVICE))
-                result_html = visualize_attention(model, feats, drug_idx)
-    return render_template_string(
-    # ───────────── HTML (原 UI + 新输入框) ─────────────
-    """
-<!doctype html>
-<html lang="en"><head><meta charset="utf-8"><title>FusionDTI </title>
-<link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600&family=Poppins:wght@500;600&display=swap" rel="stylesheet">
-<style>
-:root{--bg:#f3f4f6;--card:#fff;--primary:#6366f1;--primary-dark:#4f46e5;--text:#111827;--border:#e5e7eb;}
-*{box-sizing:border-box;margin:0;padding:0}
-body{background:var(--bg);color:var(--text);font-family:Inter,system-ui,Arial,sans-serif;line-height:1.5;padding:32px 12px;}
-h1{font-family:Poppins,Inter,sans-serif;font-weight:600;font-size:1.7rem;text-align:center;margin-bottom:28px;letter-spacing:-.2px;}
-.card{max-width:1000px;margin:0 auto;background:var(--card);border:1px solid var(--border);
-      border-radius:12px;box-shadow:0 2px 6px rgba(0,0,0,.05);padding:32px 36px;}
-label{font-weight:500;margin-bottom:6px;display:block}
-textarea,input[type=file]{width:100%;font-size:.9rem;font-family:monospace;padding:10px 12px;
-      border:1px solid var(--border);border-radius:8px;background:#fff;resize:vertical;}
-textarea{min-height:90px}
-.btn{appearance:none;border:none;cursor:pointer;padding:12px 22px;border-radius:8px;font-weight:500;
-     font-family:Inter,sans-serif;transition:all .18s ease;color:#fff;}
-.btn-primary{background:var(--primary)}.btn-primary:hover{background:var(--primary-dark)}
-.btn-neutral{background:#9ca3af;}.btn-neutral:hover{background:#6b7280}
-.grid{display:grid;gap:22px}.grid-2{grid-template-columns:1fr 1fr}
-.vis-box{margin-top:28px;border:1px solid var(--border);border-radius:10px;overflow:auto;max-height:72vh;}
-pre{white-space:pre-wrap;word-break:break-all;font-family:monospace;margin-top:8px}
-/* ── tidy table for Top-20 list ─────────────────────────────── */
-table.tg{border-collapse:collapse;margin-top:4px;font-size:0.83rem}
-table.tg th,table.tg td{border:1px solid var(--border);padding:6px 8px;text-align:left}
-table.tg th{background:var(--bg);font-weight:600}
-</style>
-</head>
-<body>
-<h1> Token-level Visualiser for Drug-Target Interaction</h1>
-<!-- ───────────── Project Links (larger + spaced) ───────────── -->
-<div style="margin-top:24px; text-align:center;">
-  <a href="https://zhaohanm.github.io/FusionDTI.github.io/" target="_blank"
-     style="display:inline-block;margin:8px 18px;padding:10px 20px;
-            background:linear-gradient(to right,#10b981,#059669);color:white;
-            font-weight:600;border-radius:8px;font-size:0.9rem;
-            font-family:Inter,sans-serif;text-decoration:none;
-            box-shadow:0 2px 6px rgba(0,0,0,0.12);transition:all 0.2s ease-in-out;"
-     onmouseover="this.style.opacity='0.9'" onmouseout="this.style.opacity='1'">
-    🌐 Project Page
-  </a>
-  <a href="https://arxiv.org/abs/2406.01651" target="_blank"
-     style="display:inline-block;margin:8px 18px;padding:10px 20px;
-            background:linear-gradient(to right,#ef4444,#dc2626);color:white;
-            font-weight:600;border-radius:8px;font-size:0.9rem;
-            font-family:Inter,sans-serif;text-decoration:none;
-            box-shadow:0 2px 6px rgba(0,0,0,0.12);transition:all 0.2s ease-in-out;"
-     onmouseover="this.style.opacity='0.9'" onmouseout="this.style.opacity='1'">
-    📄 ArXiv: 2406.01651
-  </a>
-  <a href="https://github.com/ZhaohanM/FusionDTI" target="_blank"
-     style="display:inline-block;margin:8px 18px;padding:10px 20px;
-            background:linear-gradient(to right,#3b82f6,#2563eb);color:white;
-            font-weight:600;border-radius:8px;font-size:0.9rem;
-            font-family:Inter,sans-serif;text-decoration:none;
-            box-shadow:0 2px 6px rgba(0,0,0,0.12);transition:all 0.2s ease-in-out;"
-     onmouseover="this.style.opacity='0.9'" onmouseout="this.style.opacity='1'">
-    💻 GitHub Repo
-  </a>
-</div>
-<!-- ─────────────  Guidelines for Use  ───────────── -->
-<div class="card" style="margin-bottom:24px">
-  <h2 style="font-size:1.2rem;margin-bottom:14px">Guidelines for Use</h2>
-  <ul style="margin-left:18px;line-height:1.55;list-style:decimal;">
-    <li><strong>Convert protein structure into a structure-aware sequence:</strong>
-        Upload a <code>.pdb</code> or <code>.cif</code> file. A structure-aware
-        sequence will be generated using
-        <a href="https://github.com/steineggerlab/foldseek" target="_blank">Foldseek</a>,
-        based on 3D structures from
-        <a href="https://alphafold.ebi.ac.uk" target="_blank">AlphaFold&nbsp;DB</a> or the
-        <a href="https://www.rcsb.org" target="_blank">Protein Data Bank (PDB)</a>.</li>
-    <li><strong>If you only have an amino acid sequence or a UniProt ID,</strong>
-        you must first visit the
-        <a href="https://www.rcsb.org" target="_blank">Protein Data Bank (PDB)</a>
-        or <a href="https://alphafold.ebi.ac.uk" target="_blank">AlphaFold&nbsp;DB</a>
-        to search and download the corresponding <code>.cif</code> or <code>.pdb</code> file.</li>
-    <li><strong>Drug input supports both SELFIES and SMILES:</strong><br>
-        You can enter a SELFIES string directly, or paste a SMILES string.
-        SMILES will be automatically converted to SELFIES using
-        <a href="https://github.com/aspuru-guzik-group/selfies" target="_blank">SELFIES encoder</a>.
-        If conversion fails, a red error message will be displayed.</li>
-    <li>Optionally enter a <strong>1-based</strong> drug atom or substructure index
-        to highlight the Top-10 interacting protein residues.</li>
-    <li>After inference, you can use the
-        “Download PDF” link to export a high-resolution vector version.</li>
-  </ul>
-</div>
-<div class="card">
-<form method="POST" enctype="multipart/form-data" class="grid">
-  <div><label>Protein Structure (.pdb / .cif)</label>
-       <input type="file" name="structure_file">
-       <input type="hidden" name="tmp_structure_path" value="{{ tmp_structure_path }}"></div>
-  <div><label>Protein Sequence</label>
-       <textarea name="protein_sequence" placeholder="Confirm / paste sequence…">{{ protein_seq }}</textarea></div>
-  <div><label>Drug Sequence (SELFIES/SMILES)</label>
-       <textarea name="drug_sequence" placeholder="[C][C][O]/cco …">{{ drug_seq }}</textarea></div>
-    <label>Drug atom/substructure index (1-based) – show Top-10 related protein residue</label>
-        <input type="number" name="drug_idx" min="1" style="width:120px">
-  <div class="grid grid-2">
-    <button class="btn btn-primary" type="Inference" name="confirm_structure">Confirm Structure</button>
-    <button class="btn btn-primary" type="Inference" name="Inference">Inference</button>
-  </div>
-  <button class="btn btn-neutral" style="width:100%" type="Inference" name="clear">Clear</button>
-</form>
-{% if structure_seq %}
-  <div style="margin-top:18px"><strong>Structure-aware sequence:</strong><pre>{{ structure_seq }}</pre></div>
-{% endif %}
-{% if result_html %}
-  <div class="vis-box" style="margin-top:26px">{{ result_html|safe }}</div>
-{% endif %}
-</div></body></html>
-    """,
-    protein_seq=protein_seq, drug_seq=drug_seq, structure_seq=structure_seq,
-    result_html=result_html, tmp_structure_path=tmp_structure_path)
-# ───── run ─────────────────────────────────────────────────────
 if __name__ == "__main__":
-    app.run(debug=True, host="0.0.0.0", port=7860)

+# ─── monkey-patch gradio_client so bool schemas don’t crash json_schema_to_python_type ───
+import gradio_client.utils as _gc_utils
+# back up originals
+_orig_get_type              = _gc_utils.get_type
+_orig_json2py              = _gc_utils._json_schema_to_python_type
+def _patched_get_type(schema):
+    # treat any boolean schema as if it were an empty dict
+    if isinstance(schema, bool):
+        schema = {}
+    return _orig_get_type(schema)
+def _patched_json_schema_to_python_type(schema, defs=None):
+    # treat any boolean schema as if it were an empty dict
+    if isinstance(schema, bool):
+        schema = {}
+    return _orig_json2py(schema, defs)
+_gc_utils.get_type                    = _patched_get_type
+_gc_utils._json_schema_to_python_type = _patched_json_schema_to_python_type
+# ─── now it’s safe to import Gradio and build your interface ───────────────────────────
+import gradio as gr
+import os
+import sys
+import argparse
+import tempfile
+import shutil
+import base64
+import io
 import torch
+import selfies
+from rdkit import Chem
 import matplotlib
 matplotlib.use("Agg")
 import matplotlib.pyplot as plt
 from matplotlib import cm
 from typing import Optional
 from transformers import EsmForMaskedLM, EsmTokenizer, AutoModel
+from torch.utils.data import DataLoader
+from Bio.PDB import PDBParser, MMCIFParser
+from Bio.Data import IUPACData
+from utils.drug_tokenizer import DrugTokenizer
 from utils.metric_learning_models_att_maps import Pre_encoded, FusionDTI
 from utils.foldseek_util import get_struc_seq
+# ───── Helpers ─────────────────────────────────────────────────
+three2one = {k.upper(): v for k, v in IUPACData.protein_letters_3to1.items()}
+three2one.update({"MSE": "M", "SEC": "C", "PYL": "K"})
+def simple_seq_from_structure(path: str) -> str:
+    parser = MMCIFParser(QUIET=True) if path.endswith(".cif") else PDBParser(QUIET=True)
+    structure = parser.get_structure("P", path)
+    chains = list(structure.get_chains())
+    if not chains:
+        return ""
+    chain = max(chains, key=lambda c: len(list(c.get_residues())))
+    return "".join(three2one.get(res.get_resname().upper(), "X") for res in chain)
+def smiles_to_selfies(smiles: str) -> Optional[str]:
+    try:
+        mol = Chem.MolFromSmiles(smiles)
+        if mol is None:
+            return None
+        return selfies.encoder(smiles)
+    except:
+        return None
 def parse_config():
     p = argparse.ArgumentParser()
     p.add_argument("--prot_encoder_path", default="westlake-repl/SaProt_650M_AF2")
     p.add_argument("--drug_encoder_path", default="HUBioDataLab/SELFormer")
+    p.add_argument("--agg_mode", type=str, default="mean_all_tok")
     p.add_argument("--group_size", type=int, default=1)
     p.add_argument("--fusion", default="CAN")
     p.add_argument("--device", default="cuda" if torch.cuda.is_available() else "cpu")
     p.add_argument("--save_path_prefix", default="save_model_ckp/")
 args = parse_config()
 DEVICE = args.device
+# ───── Load models & tokenizers ─────────────────────────────────
 prot_tokenizer = EsmTokenizer.from_pretrained(args.prot_encoder_path)
 prot_model     = EsmForMaskedLM.from_pretrained(args.prot_encoder_path)
+drug_tokenizer = DrugTokenizer()
 drug_model     = AutoModel.from_pretrained(args.drug_encoder_path)
+encoding       = Pre_encoded(prot_model, drug_model, args).to(DEVICE)
 def collate_fn(batch):
     query1, query2, scores = zip(*batch)
     attention_mask2 = query_encodings2["attention_mask"].bool()
     return query_encodings1["input_ids"], attention_mask1, query_encodings2["input_ids"], attention_mask2, scores
 def get_case_feature(model, loader):
     model.eval()
     with torch.no_grad():
                      p_ids.cpu(), d_ids.cpu(),
                      p_mask.cpu(), d_mask.cpu(), None)]
+# ─────────────── visualisation ───────────────────────────────────────────
 def visualize_attention(model, feats, drug_idx: Optional[int] = None) -> str:
     """
     Render a Protein → Drug cross-attention heat-map and, optionally, a
+    Top-30 protein-residue table for a chosen drug-token index.
     The token index shown on the x-axis (and accepted via *drug_idx*) is **the
     position of that token in the *original* drug sequence**, *after* the
         plt.close(fig)
         html = f'<img src="data:image/png;base64,{base64.b64encode(buf.getvalue()).decode()}" />'
+        # ───────────────────── Top-30 tabel ─────────────────────
+        table_html = ""
         if drug_idx is not None:
             # map original 0-based drug_idx → current column position
             if (drug_idx + 1) in d_indices:
             if col_pos is not None:
                 col_vec = attn[:, col_pos]
+                topk    = torch.topk(col_vec, k=min(30, len(col_vec))).indices.tolist()
                 rank_hdr = "".join(f"<th>{r+1}</th>"         for r in range(len(topk)))
                 res_row  = "".join(f"<td>{p_tokens[i]}</td>" for i in topk)
                 drug_tok_text = d_tokens[col_pos]
                 orig_idx      = d_indices[col_pos]
+                # 1) build the header row: leading “Rank”, then 1…30
+                header_cells = (
+                    "<th style='border:1px solid #ccc; padding:6px; "
+                    "background:#f7f7f7; text-align:center;'>Rank</th>"
+                    + "".join(
+                        f"<th style='border:1px solid #ccc; padding:6px; "
+                        f"background:#f7f7f7; text-align:center'>{r+1}</th>"
+                        for r in range(len(topk))
+                    )
+                )
+                # 2) build the residue row: leading “Residue”, then the residue tokens
+                residue_cells = (
+                    "<th style='border:1px solid #ccc; padding:6px; "
+                    "background:#f7f7f7; text-align:center;'>Residue</th>"
+                    + "".join(
+                        f"<td style='border:1px solid #ccc; padding:6px; "
+                        f"text-align:center'>{p_tokens[i]}</td>"
+                        for i in topk
+                    )
+                )
+                # 3) build the position row: leading “Position”, then the residue positions
+                position_cells = (
+                    "<th style='border:1px solid #ccc; padding:6px; "
+                    "background:#f7f7f7; text-align:center;'>Position</th>"
+                    + "".join(
+                        f"<td style='border:1px solid #ccc; padding:6px; "
+                        f"text-align:center'>{p_indices[i]}</td>"
+                        for i in topk
+                    )
+                )
+                # 4) assemble your table_html
                 table_html = (
+                    f"<h4 style='margin-bottom:12px'>"
+                      f"Drug atom #{orig_idx} <code>{drug_tok_text}</code> → Top-30 Protein residues"
+                    f"</h4>"
+                    f"<table style='border-collapse:collapse; margin:0 auto 24px;'>"
+                      f"<tr>{header_cells}</tr>"
+                      f"<tr>{residue_cells}</tr>"
+                      f"<tr>{position_cells}</tr>"
+                    f"</table>"
+                )
         buf_png = io.BytesIO()
+        fig.savefig(buf_png, format="png", dpi=140)
         buf_png.seek(0)
         buf_pdf = io.BytesIO()
+        fig.savefig(buf_pdf, format="pdf")
         buf_pdf.seek(0)
         plt.close(fig)
         pdf_b64 = base64.b64encode(buf_pdf.getvalue()).decode()
         html_heat = (
+            f"<div style='position: relative; width: 100%;'>"
+              # the PDF button, absolutely positioned
+              f"<a href='data:application/pdf;base64,{pdf_b64}' download='attention_heatmap.pdf' "
+                 "style='position: absolute; top: 12px; right: 12px; "
+                        "background: var(--primary); color: #fff; "
+                        "padding: 8px 16px; border-radius: 6px; "
+                        "font-size: 0.9rem; font-weight: 500; "
+                        "text-decoration: none;'>"
+                "Download PDF"
+              "</a>"
+              # the clickable heat‐map image
+              f"<a href='data:image/png;base64,{png_b64}' target='_blank' title='Click to enlarge'>"
+                f"<img src='data:image/png;base64,{png_b64}' "
+                     "style='display: block; width: 100%; height: auto; cursor: zoom-in;'/>"
+              "</a>"
+            "</div>"
         )
         return table_html + html_heat
+# ───── Gradio Callbacks ─────────────────────────────────────────
+ROOT = os.path.dirname(os.path.abspath(__file__))
+FOLDSEEK_BIN = os.path.join(ROOT, "bin", "foldseek")
+def extract_sequence_cb(structure_file):
+    if structure_file is None or not os.path.exists(structure_file.name):
+        return ""
+    parsed = get_struc_seq(FOLDSEEK_BIN, structure_file.name, None, plddt_mask=False)
+    first_chain = next(iter(parsed))
+    _, _, struct_seq = parsed[first_chain]
+    return struct_seq
+def inference_cb(prot_seq, drug_seq, atom_idx):
+    if not prot_seq:
+        return "<p style='color:red'>Please extract or enter a protein sequence first.</p>"
+    if not drug_seq.strip():
+        return "<p style='color:red'>Please enter a drug sequence.</p>"
+    if not drug_seq.strip().startswith("["):
+        conv = smiles_to_selfies(drug_seq.strip())
+        if conv is None:
+            return "<p style='color:red'>SMILES→SELFIES conversion failed.</p>"
+        drug_seq = conv
+    loader = DataLoader([(prot_seq, drug_seq, 1)], batch_size=1, collate_fn=collate_fn)
+    feats = get_case_feature(encoding, loader)
+    model = FusionDTI(446, 768, args).to(DEVICE)
+    ckpt = os.path.join(f"{args.save_path_prefix}{args.dataset}_{args.fusion}", "best_model.ckpt")
+    if os.path.isfile(ckpt):
+        model.load_state_dict(torch.load(ckpt, map_location=DEVICE))
+    return visualize_attention(model, feats, int(atom_idx)-1 if atom_idx else None)
+def clear_cb():
+    return None, "", "", None, ""
+# ───── Gradio Interface Definition ────────���──────────────────────
+css = """
+:root {
+  --bg: #f3f4f6;
+  --card: #ffffff;
+  --border: #e5e7eb;
+  --primary: #6366f1;
+  --primary-dark: #4f46e5;
+  --text: #111827;
+}
+* { box-sizing: border-box; margin: 0; padding: 0; }
+body { background: var(--bg); color: var(--text); font-family: Inter,system-ui,Arial,sans-serif; }
+h1 { font-family: Poppins,Inter,sans-serif; font-weight: 600; font-size: 2rem; text-align: center; margin: 24px 0; }
+button, .gr-button { font-family: Inter,sans-serif; font-weight: 600; }
+#project-links { text-align: center; margin-bottom: 32px; }
+#project-links .gr-button { margin: 0 8px; min-width: 160px; }
+#project-links .gr-button:nth-child(1) { background: #10b981; }
+#project-links .gr-button:nth-child(2) { background: #ef4444; }
+#project-links .gr-button:nth-child(3) { background: #3b82f6; }
+#project-links .gr-button:hover { opacity: 0.9; }
+.link-btn{display:inline-block;margin:0 8px;padding:10px 20px;border-radius:8px;
+         color:white;font-weight:600;text-decoration:none;box-shadow:0 2px 6px rgba(0,0,0,0.12);
+         transition:all .2s ease-in-out;}
+.link-btn:hover{opacity:.9;}
+.link-btn.project{background:linear-gradient(to right,#10b981,#059669);}
+.link-btn.arxiv  {background:linear-gradient(to right,#ef4444,#dc2626);}
+.link-btn.github {background:linear-gradient(to right,#3b82f6,#2563eb);}
+/* make *all* gradio buttons a bit taller */
+.gr-button { min-height: 10px !important; }
+/* now target just our two big action buttons */
+#extract-btn, #inference-btn {
+    width: 5px !important;
+    min-height: 36px !important;
+    margin-top: 12px !important;
+}
+/* and make clear button full width but shorter */
+#clear-btn {
+    width: 10px  !important;
+    min-height: 36px !important;
+    margin-top: 12px !important;
+}
+#input-card label {
+    font-weight: 600 !important;    /* make the text bold */
+    color: var(--text) !important;  /* use your standard text color */
+}
+.card {
+  background: var(--card);
+  border: 1px solid var(--border);
+  border-radius: 12px;
+  padding: 24px;
+  max-width: 1000px;
+  margin: 0 auto 32px;
+  box-shadow: 0 2px 6px rgba(0,0,0,0.05);
+}
+#guidelines-card h2 {
+  font-size: 1.4rem;
+  margin-bottom: 16px;
+  text-align: center;
+}
+#guidelines-card ol {
+  margin-left: 20px;
+  line-height: 1.6;
+  font-size: 1rem;
+}
+#input-card .gr-row, #input-card .gr-cols {
+  gap: 16px;
+}
+#input-card .gr-button {
+  flex: 1;
+}
+#output-card {
+  padding-top: 0;
+}
+"""
+with gr.Blocks(css=css) as demo:
+    # ───────────── Title ─────────────
+    gr.Markdown("<h1>Token-level Visualiser for Drug-Target Interaction</h1>")
+    # ───────────── Project Links ─────────────
+    gr.Markdown("""
+        <div style="text-align:center;margin-bottom:32px;">
+          <a class="link-btn project" href="https://zhaohanm.github.io/FusionDTI.github.io/" target="_blank">🌐 Project Page</a>
+          <a class="link-btn arxiv"   href="https://arxiv.org/abs/2406.01651"        target="_blank">📄 ArXiv: 2406.01651</a>
+          <a class="link-btn github"  href="https://github.com/ZhaohanM/FusionDTI"    target="_blank">💻 GitHub Repo</a>
+        </div>
+        """)
+    # ───────────── Guidelines Card ─────────────
+    gr.HTML(
+        """
+        <div class="card" style="margin-bottom:24px">
+          <h2 style="font-size:1.2rem;margin-bottom:14px">Guidelines for User</h2>
+          <ul style="font-size:1rem; margin-left:18px;line-height:1.55;list-style:decimal;">
+            <li><strong>Convert protein structure into a structure-aware sequence:</strong>
+                Upload a <code>.pdb</code> or <code>.cif</code> file. A structure-aware
+                sequence will be generated using
+                <a href="https://github.com/steineggerlab/foldseek" target="_blank">Foldseek</a>,
+                based on 3D structures from
+                <a href="https://alphafold.ebi.ac.uk" target="_blank">AlphaFold&nbsp;DB</a> or the
+                <a href="https://www.rcsb.org" target="_blank">Protein Data Bank (PDB)</a>.</li>
+            <li><strong>If you only have an amino acid sequence or a UniProt ID,</strong>
+                you must first visit the
+                <a href="https://www.rcsb.org" target="_blank">Protein Data Bank (PDB)</a>
+                or <a href="https://alphafold.ebi.ac.uk" target="_blank">AlphaFold&nbsp;DB</a>
+                to search and download the corresponding <code>.cif</code> or <code>.pdb</code> file.</li>
+            <li><strong>Drug input supports both SELFIES and SMILES:</strong><br>
+                You can enter a SELFIES string directly, or paste a SMILES string.
+                SMILES will be automatically converted to SELFIES using
+                <a href="https://github.com/aspuru-guzik-group/selfies" target="_blank">SELFIES encoder</a>.
+                If conversion fails, a red error message will be displayed.</li>
+            <li>Optionally enter a <strong>1-based</strong> drug atom or substructure index
+                to highlight the Top-10 interacting protein residues.</li>
+            <li>After inference, you can use the
+                “Download PDF” link to export a high-resolution vector version.</li>
+          </ul>
+        </div>
+        """)
+    # ───────────── Input Card ─────────────
+    with gr.Column(elem_id="input-card", elem_classes="card"):
+        protein_seq = gr.Textbox(
+            label="Protein Structure-aware Sequence",
+            lines=3,
+            elem_id="protein-seq"
+        )
+        drug_seq = gr.Textbox(
+            label="Drug Sequence (SELFIES/SMILES)",
+            lines=3,
+            elem_id="drug-seq"
+        )
+        structure_file = gr.File(
+            label="Upload Protein Structure (.pdb/.cif)",
+            file_types=[".pdb", ".cif"],
+            elem_id="structure-file"
+        )
+        drug_idx = gr.Number(
+            label="Drug atom/substructure index (1-based)",
+            value=None,
+            precision=0,
+            elem_id="drug-idx"
+        )
+    # ───────────── Action Buttons ─────────────
+    with gr.Row(elem_id="action-buttons", equal_height=True):
+        btn_extract = gr.Button(
+            "Extract sequence",
+            variant="primary",
+            elem_id="extract-btn"
+        )
+        btn_infer = gr.Button(
+            "Inference",
+            variant="primary",
+            elem_id="inference-btn"
+        )
+    with gr.Row():
+        clear_btn = gr.Button(
+            "Clear",
+            variant="secondary",
+            elem_classes="full-width",
+            elem_id="clear-btn"
+        )
+    # ───────────── Output Visualization ─────────────
+    output_html  = gr.HTML(elem_id="result-html")
+    # ───────────── Event Wiring ─────────────
+    btn_extract.click(
+        fn=extract_sequence_cb,
+        inputs=[structure_file],
+        outputs=[protein_seq]
+    )
+    btn_infer.click(
+        fn=inference_cb,
+        inputs=[protein_seq, drug_seq, drug_idx],
+        outputs=[output_html]
+    )
+    clear_btn.click(
+        fn=lambda: ("", "", None, "", ""),
+        inputs=[],
+        outputs=[protein_seq, drug_seq, drug_idx, output_html, structure_file]
+    )
 if __name__ == "__main__":
+    demo.launch(server_name="0.0.0.0", server_port=7860, share=True)