Spaces:

Zhaohan-Meng
/

FusionDTI

Running

App Files Files Community

Zhaohan Meng commited on May 17

Commit

44cf989

verified ·

1 Parent(s): cff7f27

Update app.py

Browse files

Files changed (1) hide show

app.py +277 -361

app.py CHANGED Viewed

@@ -1,81 +1,36 @@
-# ─── monkey-patch gradio_client so bool schemas don’t crash json_schema_to_python_type ───
-import gradio_client.utils as _gc_utils
-# back up originals
-_orig_get_type              = _gc_utils.get_type
-_orig_json2py              = _gc_utils._json_schema_to_python_type
-def _patched_get_type(schema):
-    # treat any boolean schema as if it were an empty dict
-    if isinstance(schema, bool):
-        schema = {}
-    return _orig_get_type(schema)
-def _patched_json_schema_to_python_type(schema, defs=None):
-    # treat any boolean schema as if it were an empty dict
-    if isinstance(schema, bool):
-        schema = {}
-    return _orig_json2py(schema, defs)
-_gc_utils.get_type                    = _patched_get_type
-_gc_utils._json_schema_to_python_type = _patched_json_schema_to_python_type
-# ─── now it’s safe to import Gradio and build your interface ───────────────────────────
-import gradio as gr
-import os
-import sys
-import argparse
-import tempfile
-import shutil
-import base64
-import io
-import torch
 import selfies
 from rdkit import Chem
 import matplotlib
 matplotlib.use("Agg")
 import matplotlib.pyplot as plt
 from matplotlib import cm
 from typing import Optional
-from transformers import EsmForMaskedLM, EsmTokenizer, AutoModel
-from torch.utils.data import DataLoader
-from Bio.PDB import PDBParser, MMCIFParser
-from Bio.Data import IUPACData
 from utils.drug_tokenizer import DrugTokenizer
 from utils.metric_learning_models_att_maps import Pre_encoded, FusionDTI
 from utils.foldseek_util import get_struc_seq
-# ───── Helpers ─────────────────────────────────────────────────
-three2one = {k.upper(): v for k, v in IUPACData.protein_letters_3to1.items()}
-three2one.update({"MSE": "M", "SEC": "C", "PYL": "K"})
-def simple_seq_from_structure(path: str) -> str:
-    parser = MMCIFParser(QUIET=True) if path.endswith(".cif") else PDBParser(QUIET=True)
-    structure = parser.get_structure("P", path)
-    chains = list(structure.get_chains())
-    if not chains:
-        return ""
-    chain = max(chains, key=lambda c: len(list(c.get_residues())))
-    return "".join(three2one.get(res.get_resname().upper(), "X") for res in chain)
-def smiles_to_selfies(smiles: str) -> Optional[str]:
-    try:
-        mol = Chem.MolFromSmiles(smiles)
-        if mol is None:
-            return None
-        return selfies.encoder(smiles)
-    except Exception:
-        return None
 def parse_config():
     p = argparse.ArgumentParser()
     p.add_argument("--prot_encoder_path", default="westlake-repl/SaProt_650M_AF2")
     p.add_argument("--drug_encoder_path", default="HUBioDataLab/SELFormer")
-    p.add_argument("--agg_mode", type=str, default="mean_all_tok")
     p.add_argument("--group_size", type=int, default=1)
     p.add_argument("--fusion", default="CAN")
     p.add_argument("--device", default="cuda" if torch.cuda.is_available() else "cpu")
     p.add_argument("--save_path_prefix", default="save_model_ckp/")
@@ -85,13 +40,16 @@ def parse_config():
 args = parse_config()
 DEVICE = args.device
-# ───── Load models & tokenizers ─────────────────────────────────
 prot_tokenizer = EsmTokenizer.from_pretrained(args.prot_encoder_path)
 prot_model     = EsmForMaskedLM.from_pretrained(args.prot_encoder_path)
-drug_tokenizer = DrugTokenizer()
 drug_model     = AutoModel.from_pretrained(args.drug_encoder_path)
-encoding       = Pre_encoded(prot_model, drug_model, args).to(DEVICE)
 def collate_fn(batch):
     query1, query2, scores = zip(*batch)
@@ -117,8 +75,20 @@ def collate_fn(batch):
     attention_mask2 = query_encodings2["attention_mask"].bool()
     return query_encodings1["input_ids"], attention_mask1, query_encodings2["input_ids"], attention_mask2, scores
 def get_case_feature(model, loader):
     model.eval()
     with torch.no_grad():
@@ -130,12 +100,17 @@ def get_case_feature(model, loader):
                      p_ids.cpu(), d_ids.cpu(),
                      p_mask.cpu(), d_mask.cpu(), None)]
-# ─────────────── visualisation ───────────────────────────────────────────
 def visualize_attention(model, feats, drug_idx: Optional[int] = None) -> str:
     """
     Render a Protein → Drug cross-attention heat-map and, optionally, a
-    Top-30 protein-residue table for a chosen drug-token index.
     The token index shown on the x-axis (and accepted via *drug_idx*) is **the
     position of that token in the *original* drug sequence**, *after* the
@@ -234,8 +209,8 @@ def visualize_attention(model, feats, drug_idx: Optional[int] = None) -> str:
         plt.close(fig)
         html = f'<img src="data:image/png;base64,{base64.b64encode(buf.getvalue()).decode()}" />'
-        # ───────────────────── Top-30 tabel ─────────────────────
-        table_html = ""
         if drug_idx is not None:
             # map original 0-based drug_idx → current column position
             if (drug_idx + 1) in d_indices:
@@ -247,7 +222,7 @@ def visualize_attention(model, feats, drug_idx: Optional[int] = None) -> str:
             if col_pos is not None:
                 col_vec = attn[:, col_pos]
-                topk    = torch.topk(col_vec, k=min(30, len(col_vec))).indices.tolist()
                 rank_hdr = "".join(f"<th>{r+1}</th>"         for r in range(len(topk)))
                 res_row  = "".join(f"<td>{p_tokens[i]}</td>" for i in topk)
@@ -255,58 +230,24 @@ def visualize_attention(model, feats, drug_idx: Optional[int] = None) -> str:
                 drug_tok_text = d_tokens[col_pos]
                 orig_idx      = d_indices[col_pos]
-                # 1) build the header row: leading “Rank”, then 1…30
-                header_cells = (
-                    "<th style='border:1px solid #ccc; padding:6px; "
-                    "background:#f7f7f7; text-align:center;'>Rank</th>"
-                    + "".join(
-                        f"<th style='border:1px solid #ccc; padding:6px; "
-                        f"background:#f7f7f7; text-align:center'>{r+1}</th>"
-                        for r in range(len(topk))
-                    )
-                )
-                # 2) build the residue row: leading “Residue”, then the residue tokens
-                residue_cells = (
-                    "<th style='border:1px solid #ccc; padding:6px; "
-                    "background:#f7f7f7; text-align:center;'>Residue</th>"
-                    + "".join(
-                        f"<td style='border:1px solid #ccc; padding:6px; "
-                        f"text-align:center'>{p_tokens[i]}</td>"
-                        for i in topk
-                    )
-                )
-                # 3) build the position row: leading “Position”, then the residue positions
-                position_cells = (
-                    "<th style='border:1px solid #ccc; padding:6px; "
-                    "background:#f7f7f7; text-align:center;'>Position</th>"
-                    + "".join(
-                        f"<td style='border:1px solid #ccc; padding:6px; "
-                        f"text-align:center'>{p_indices[i]}</td>"
-                        for i in topk
-                    )
-                )
-                # 4) assemble your table_html
-                table_html = (
-                    f"<h4 style='margin-bottom:12px'>"
-                      f"Drug atom #{orig_idx} <code>{drug_tok_text}</code> → Top-30 Protein residues"
-                    f"</h4>"
-                    f"<table style='border-collapse:collapse; margin:0 auto 24px;'>"
-                      f"<tr>{header_cells}</tr>"
-                      f"<tr>{residue_cells}</tr>"
-                      f"<tr>{position_cells}</tr>"
-                    f"</table>"
-                )
         buf_png = io.BytesIO()
-        fig.savefig(buf_png, format="png", dpi=140)
         buf_png.seek(0)
         buf_pdf = io.BytesIO()
-        fig.savefig(buf_pdf, format="pdf")
         buf_pdf.seek(0)
         plt.close(fig)
@@ -314,253 +255,228 @@ def visualize_attention(model, feats, drug_idx: Optional[int] = None) -> str:
         pdf_b64 = base64.b64encode(buf_pdf.getvalue()).decode()
         html_heat = (
-            f"<div style='position: relative; width: 100%;'>"
-              # the PDF button, absolutely positioned
-              f"<a href='data:application/pdf;base64,{pdf_b64}' download='attention_heatmap.pdf' "
-                 "style='position: absolute; top: 12px; right: 12px; "
-                        "background: var(--primary); color: #fff; "
-                        "padding: 8px 16px; border-radius: 6px; "
-                        "font-size: 0.9rem; font-weight: 500; "
-                        "text-decoration: none;'>"
-                "Download PDF"
-              "</a>"
-              # the clickable heat‐map image
-              f"<a href='data:image/png;base64,{png_b64}' target='_blank' title='Click to enlarge'>"
-                f"<img src='data:image/png;base64,{png_b64}' "
-                     "style='display: block; width: 100%; height: auto; cursor: zoom-in;'/>"
-              "</a>"
-            "</div>"
         )
         return table_html + html_heat
-# ───── Gradio Callbacks ─────────────────────────────────────────
-ROOT = os.path.dirname(os.path.abspath(__file__))
-FOLDSEEK_BIN = os.path.join(ROOT, "bin", "foldseek")
-def extract_sequence_cb(structure_file):
-    if structure_file is None or not os.path.exists(structure_file.name):
-        return ""
-    parsed = get_struc_seq(FOLDSEEK_BIN, structure_file.name, None, plddt_mask=False)
-    first_chain = next(iter(parsed))
-    _, _, struct_seq = parsed[first_chain]
-    return struct_seq
-def inference_cb(prot_seq, drug_seq, atom_idx):
-    if not prot_seq:
-        return "<p style='color:red'>Please extract or enter a protein sequence first.</p>"
-    if not drug_seq.strip():
-        return "<p style='color:red'>Please enter a drug sequence.</p>"
-    if not drug_seq.strip().startswith("["):
-        conv = smiles_to_selfies(drug_seq.strip())
-        if conv is None:
-            return "<p style='color:red'>SMILES→SELFIES conversion failed.</p>"
-        drug_seq = conv
-    loader = DataLoader([(prot_seq, drug_seq, 1)], batch_size=1, collate_fn=collate_fn)
-    feats = get_case_feature(encoding, loader)
-    model = FusionDTI(446, 768, args).to(DEVICE)
-    ckpt = os.path.join(f"{args.save_path_prefix}{args.dataset}_{args.fusion}", "best_model.ckpt")
-    if os.path.isfile(ckpt):
-        model.load_state_dict(torch.load(ckpt, map_location=DEVICE))
-    return visualize_attention(model, feats, int(atom_idx)-1 if atom_idx else None)
-def clear_cb():
-    return None, "", "", None, ""
-# ───── Gradio Interface Definition ───────────────────────────────
-css = """
-:root {
-  --bg: #f3f4f6;
-  --card: #ffffff;
-  --border: #e5e7eb;
-  --primary: #6366f1;
-  --primary-dark: #4f46e5;
-  --text: #111827;
-}
-* { box-sizing: border-box; margin: 0; padding: 0; }
-body { background: var(--bg); color: var(--text); font-family: Inter,system-ui,Arial,sans-serif; }
-h1 { font-family: Poppins,Inter,sans-serif; font-weight: 600; font-size: 2rem; text-align: center; margin: 24px 0; }
-button, .gr-button { font-family: Inter,sans-serif; font-weight: 600; }
-#project-links { text-align: center; margin-bottom: 32px; }
-#project-links .gr-button { margin: 0 8px; min-width: 160px; }
-#project-links .gr-button:nth-child(1) { background: #10b981; }
-#project-links .gr-button:nth-child(2) { background: #ef4444; }
-#project-links .gr-button:nth-child(3) { background: #3b82f6; }
-#project-links .gr-button:hover { opacity: 0.9; }
-.link-btn{display:inline-block;margin:0 8px;padding:10px 20px;border-radius:8px;
-         color:white;font-weight:600;text-decoration:none;box-shadow:0 2px 6px rgba(0,0,0,0.12);
-         transition:all .2s ease-in-out;}
-.link-btn:hover{opacity:.9;}
-.link-btn.project{background:linear-gradient(to right,#10b981,#059669);}
-.link-btn.arxiv  {background:linear-gradient(to right,#ef4444,#dc2626);}
-.link-btn.github {background:linear-gradient(to right,#3b82f6,#2563eb);}
-/* make *all* gradio buttons a bit taller */
-.gr-button { min-height: 10px !important; }
-/* now target just our two big action buttons */
-#extract-btn, #inference-btn {
-    width: 5px !important;
-    min-height: 36px !important;
-    margin-top: 12px !important;
-}
-/* and make clear button full width but shorter */
-#clear-btn {
-    width: 10px  !important;
-    min-height: 36px !important;
-    margin-top: 12px !important;
-}
-#input-card label {
-    font-weight: 600 !important;    /* make the text bold */
-    color: var(--text) !important;  /* use your standard text color */
-}
-.card {
-  background: var(--card);
-  border: 1px solid var(--border);
-  border-radius: 12px;
-  padding: 24px;
-  max-width: 1000px;
-  margin: 0 auto 32px;
-  box-shadow: 0 2px 6px rgba(0,0,0,0.05);
-}
-#guidelines-card h2 {
-  font-size: 1.4rem;
-  margin-bottom: 16px;
-  text-align: center;
-}
-#guidelines-card ol {
-  margin-left: 20px;
-  line-height: 1.6;
-  font-size: 1rem;
-}
-#input-card .gr-row, #input-card .gr-cols {
-  gap: 16px;
-}
-#input-card .gr-button {
-  flex: 1;
-}
-#output-card {
-  padding-top: 0;
-}
-"""
-with gr.Blocks(css=css) as demo:
-    # ───────────── Title ─────────────
-    gr.Markdown("<h1>Token-level Visualiser for Drug-Target Interaction</h1>")
-    # ───────────── Project Links ─────────────
-    gr.Markdown("""
-        <div style="text-align:center;margin-bottom:32px;">
-          <a class="link-btn project" href="https://zhaohanm.github.io/FusionDTI.github.io/" target="_blank">🌐 Project Page</a>
-          <a class="link-btn arxiv"   href="https://arxiv.org/abs/2406.01651"        target="_blank">📄 ArXiv: 2406.01651</a>
-          <a class="link-btn github"  href="https://github.com/ZhaohanM/FusionDTI"    target="_blank">💻 GitHub Repo</a>
-        </div>
-        """)
-    # ───────────── Guidelines Card ─────────────
-    gr.HTML(
-        """
-        <div class="card" style="margin-bottom:24px">
-          <h2 style="font-size:1.2rem;margin-bottom:14px">Guidelines for User</h2>
-          <ul style="font-size:1rem; margin-left:18px;line-height:1.55;list-style:decimal;">
-            <li><strong>Convert protein structure into a structure-aware sequence:</strong>
-                Upload a <code>.pdb</code> or <code>.cif</code> file. A structure-aware
-                sequence will be generated using
-                <a href="https://github.com/steineggerlab/foldseek" target="_blank">Foldseek</a>,
-                based on 3D structures from
-                <a href="https://alphafold.ebi.ac.uk" target="_blank">AlphaFold&nbsp;DB</a> or the
-                <a href="https://www.rcsb.org" target="_blank">Protein Data Bank (PDB)</a>.</li>
-            <li><strong>If you only have an amino acid sequence or a UniProt ID,</strong>
-                you must first visit the
-                <a href="https://www.rcsb.org" target="_blank">Protein Data Bank (PDB)</a>
-                or <a href="https://alphafold.ebi.ac.uk" target="_blank">AlphaFold&nbsp;DB</a>
-                to search and download the corresponding <code>.cif</code> or <code>.pdb</code> file.</li>
-            <li><strong>Drug input supports both SELFIES and SMILES:</strong><br>
-                You can enter a SELFIES string directly, or paste a SMILES string.
-                SMILES will be automatically converted to SELFIES using
-                <a href="https://github.com/aspuru-guzik-group/selfies" target="_blank">SELFIES encoder</a>.
-                If conversion fails, a red error message will be displayed.</li>
-            <li>Optionally enter a <strong>1-based</strong> drug atom or substructure index
-                to highlight the Top-30 interacting protein residues.</li>
-            <li>After inference, you can use the
-                “Download PDF” link to export a high-resolution vector version.</li>
-          </ul>
-        </div>
-        """)
-    # ───────────── Input Card ─────────────
-    with gr.Column(elem_id="input-card", elem_classes="card"):
-        protein_seq = gr.Textbox(
-            label="Protein Structure-aware Sequence",
-            lines=3,
-            elem_id="protein-seq"
-        )
-        drug_seq = gr.Textbox(
-            label="Drug Sequence (SELFIES/SMILES)",
-            lines=3,
-            elem_id="drug-seq"
-        )
-        structure_file = gr.File(
-            label="Upload Protein Structure (.pdb/.cif)",
-            file_types=[".pdb", ".cif"],
-            elem_id="structure-file"
-        )
-        drug_idx = gr.Textbox(
-            label="Drug atom index (1-based)",
-            lines=1,
-            elem_id="drug-idx"
-        )
-    # ───────────── Action Buttons ─────────────
-    with gr.Row(elem_id="action-buttons", equal_height=True):
-        btn_extract = gr.Button(
-            "Extract sequence",
-            variant="primary",
-            elem_id="extract-btn"
-        )
-        btn_infer = gr.Button(
-            "Inference",
-            variant="primary",
-            elem_id="inference-btn"
-        )
-    with gr.Row():
-        clear_btn = gr.Button(
-            "Clear",
-            variant="secondary",
-            elem_classes="full-width",
-            elem_id="clear-btn"
-        )
-    # ───────────── Output Visualization ─────────────
-    output_html  = gr.HTML(elem_id="result-html")
-    # ───────────── Event Wiring ─────────────
-    btn_extract.click(
-        fn=extract_sequence_cb,
-        inputs=[structure_file],
-        outputs=[protein_seq]
-    )
-    btn_infer.click(
-        fn=inference_cb,
-        inputs=[protein_seq, drug_seq, drug_idx],
-        outputs=[output_html]
-    )
-    clear_btn.click(
-        fn=lambda: ("", "", "", ""),
-        inputs=[],
-        outputs=[protein_seq, drug_seq, drug_idx, output_html]
-    )
 if __name__ == "__main__":
-    demo.launch(share=True)

+import os, sys, argparse, tempfile, shutil, base64, io
+from flask import Flask, request, render_template_string
+from werkzeug.utils import secure_filename
+from torch.utils.data import DataLoader
 import selfies
 from rdkit import Chem
+import app as gr
+import torch
 import matplotlib
 matplotlib.use("Agg")
 import matplotlib.pyplot as plt
 from matplotlib import cm
 from typing import Optional
 from utils.drug_tokenizer import DrugTokenizer
+from transformers import EsmForMaskedLM, EsmTokenizer, AutoModel
 from utils.metric_learning_models_att_maps import Pre_encoded, FusionDTI
 from utils.foldseek_util import get_struc_seq
+# ───── global paths / args ──────────────────────────────────────
+FOLDSEEK_BIN = shutil.which("foldseek")
+os.environ["TOKENIZERS_PARALLELISM"] = "false"
+sys.path.append("..")
 def parse_config():
     p = argparse.ArgumentParser()
+    p.add_argument("-f")
     p.add_argument("--prot_encoder_path", default="westlake-repl/SaProt_650M_AF2")
     p.add_argument("--drug_encoder_path", default="HUBioDataLab/SELFormer")
+    p.add_argument("--agg_mode", default="mean_all_tok", type=str, help="{cls|mean|mean_all_tok}")
     p.add_argument("--group_size", type=int, default=1)
+    p.add_argument("--lr", type=float, default=1e-4)
     p.add_argument("--fusion", default="CAN")
     p.add_argument("--device", default="cuda" if torch.cuda.is_available() else "cpu")
     p.add_argument("--save_path_prefix", default="save_model_ckp/")
 args = parse_config()
 DEVICE = args.device
+# ───── tokenisers & encoders ────────────────────────────────────
 prot_tokenizer = EsmTokenizer.from_pretrained(args.prot_encoder_path)
 prot_model     = EsmForMaskedLM.from_pretrained(args.prot_encoder_path)
+drug_tokenizer = DrugTokenizer()        # SELFIES
 drug_model     = AutoModel.from_pretrained(args.drug_encoder_path)
+encoding = Pre_encoded(prot_model, drug_model, args).to(DEVICE)
+# ─── collate fn ────────────────────────────────────────────────
 def collate_fn(batch):
     query1, query2, scores = zip(*batch)
     attention_mask2 = query_encodings2["attention_mask"].bool()
     return query_encodings1["input_ids"], attention_mask1, query_encodings2["input_ids"], attention_mask2, scores
+# def collate_fn_batch_encoding(batch):
+def smiles_to_selfies(smiles: str) -> Optional[str]:
+    try:
+        mol = Chem.MolFromSmiles(smiles)
+        if mol is None:
+            return None
+        selfies_str = selfies.encoder(smiles)
+        return selfies_str
+    except Exception:
+        return None
+# ───── single-case embedding ───────────────────────────────────
 def get_case_feature(model, loader):
     model.eval()
     with torch.no_grad():
                      p_ids.cpu(), d_ids.cpu(),
                      p_mask.cpu(), d_mask.cpu(), None)]
+# ───── helper：过滤特殊 token ───────────────────────────────────
+def clean_tokens(ids, tokenizer):
+    toks = tokenizer.convert_ids_to_tokens(ids.tolist())
+    return [t for t in toks if t not in tokenizer.all_special_tokens]
+# ───── visualisation ───────────────────────────────────────────
 def visualize_attention(model, feats, drug_idx: Optional[int] = None) -> str:
     """
     Render a Protein → Drug cross-attention heat-map and, optionally, a
+    Top-20 protein-residue table for a chosen drug-token index.
     The token index shown on the x-axis (and accepted via *drug_idx*) is **the
     position of that token in the *original* drug sequence**, *after* the
         plt.close(fig)
         html = f'<img src="data:image/png;base64,{base64.b64encode(buf.getvalue()).decode()}" />'
+        # ───────────────────── 生成 Top-20 表（若需要） ─────────────────────
+        table_html = ""                   # 先设空串，方便后面统一拼接
         if drug_idx is not None:
             # map original 0-based drug_idx → current column position
             if (drug_idx + 1) in d_indices:
             if col_pos is not None:
                 col_vec = attn[:, col_pos]
+                topk    = torch.topk(col_vec, k=min(20, len(col_vec))).indices.tolist()
                 rank_hdr = "".join(f"<th>{r+1}</th>"         for r in range(len(topk)))
                 res_row  = "".join(f"<td>{p_tokens[i]}</td>" for i in topk)
                 drug_tok_text = d_tokens[col_pos]
                 orig_idx      = d_indices[col_pos]
+                table_html = (
+                    f"<h4 style='margin-bottom:6px'>"
+                    f"Drug token #{orig_idx} <code>{drug_tok_text}</code> "
+                    f"→ Top-20 Protein residues</h4>"
+                    "<table class='tg' style='margin-bottom:8px'>"
+                    f"<tr><th>Rank</th>{rank_hdr}</tr>"
+                    f"<tr><td>Residue</td>{res_row}</tr>"
+                    f"<tr><td>Position</td>{pos_row}</tr>"
+                    "</table>")
+        # ────────────────── 生成可放大 + 可下载的热图 ────────────────────
         buf_png = io.BytesIO()
+        fig.savefig(buf_png, format="png", dpi=140)   # 预览（光栅）
         buf_png.seek(0)
         buf_pdf = io.BytesIO()
+        fig.savefig(buf_pdf, format="pdf")            # 高清下载（矢量）
         buf_pdf.seek(0)
         plt.close(fig)
         pdf_b64 = base64.b64encode(buf_pdf.getvalue()).decode()
         html_heat = (
+            f"<a href='data:image/png;base64,{png_b64}' target='_blank' "
+            f"title='Click to enlarge'>"
+            f"<img src='data:image/png;base64,{png_b64}' "
+            f"style='max-width:100%;height:auto;cursor:zoom-in' /></a>"
+            f"<div style='margin-top:6px'>"
+            f"<a href='data:application/pdf;base64,{pdf_b64}' "
+            f"download='attention_heatmap.pdf'>Download PDF</a></div>"
         )
+        # ───────────────────────── 返回最终 HTML ─────────────────────────
         return table_html + html_heat
+def inference(protein_seq, drug_seq, drug_idx, structure_file):
+    # —— 这一块换成 Gradio 取文件路径 ——
+    if structure_file is not None and os.path.exists(structure_file.name):
+        tmp_structure_path = structure_file.name
+    else:
+        return "<p style='color:red'>请先上传一个有效的 .pdb 或 .cif 文件。</p>"
+    # 调用 foldseek
+    try:
+        parsed = get_struc_seq(FOLDSEEK_BIN, tmp_structure_path, ["A"], plddt_mask=False)
+        chain = next(iter(parsed))
+        protein_seq = parsed[chain][2]
+    except Exception as e:
+        return f"<p style='color:red'>Foldseek 提取失败：{e}</p>"
+# ───── Flask app ───────────────────────────────────────────────
+app = Flask(__name__)
+@app.route("/", methods=["GET", "POST"])
+def index():
+    protein_seq = drug_seq = structure_seq = ""; result_html = None
+    tmp_structure_path = ""; drug_idx = None
+    if request.method == "POST":
+        drug_idx_raw = request.form.get("drug_idx", "")
+        drug_idx = int(drug_idx_raw)-1 if drug_idx_raw.isdigit() else None
+        struct = request.files.get("structure_file")
+        if struct and struct.filename:
+            tmp_dir = tempfile.mkdtemp(prefix="foldseek_")
+            safe_name = secure_filename(struct.filename)
+            tmp_structure_path = os.path.join(tmp_dir, safe_name)
+            struct.save(tmp_structure_path)
+        else:
+            tmp_structure_path = request.form.get("tmp_structure_path", "")
+        if "clear" in request.form:
+            protein_seq = drug_seq = structure_seq = ""; tmp_structure_path = ""
+        elif "confirm_structure" in request.form and tmp_structure_path:
+            try:
+                parsed_seqs = get_struc_seq(FOLDSEEK_BIN, tmp_structure_path, ["A"], plddt_mask=False)["A"]
+                seq, foldseek_seq, structure_seq = parsed_seqs        # 用完后清除目录
+            except Exception as e:
+                result_html = (
+                    "<p style='color:red'><strong>Foldseek failed to extract sequence "
+                    f"from structure: {e}</strong></p>")
+                structure_seq = ""
+            protein_seq = structure_seq
+            drug_input = request.form.get("drug_sequence", "")
+            # Heuristically check if input is SMILES (not starting with [) and convert
+            if not drug_input.strip().startswith("["):
+                converted = smiles_to_selfies(drug_input.strip())
+                if converted:
+                    drug_seq = converted
+                else:
+                    drug_seq = ""
+                    result_html = "<p style='color:red'><strong>Failed to convert SMILES to SELFIES. Please check the input string.</strong></p>"
+            else:
+                drug_seq = drug_input
+        elif "Inference" in request.form:
+            protein_seq = request.form.get("protein_sequence", "")
+            drug_seq    = request.form.get("drug_sequence", "")
+            if protein_seq and drug_seq:
+                loader = DataLoader([(protein_seq, drug_seq, 1)], batch_size=1,
+                                    collate_fn=collate_fn)
+                feats  = get_case_feature(encoding, loader)
+                model  = FusionDTI(446, 768, args).to(DEVICE)
+                ckpt   = os.path.join(f"{args.save_path_prefix}{args.dataset}_{args.fusion}",
+                                      "best_model.ckpt")
+                if os.path.isfile(ckpt):
+                    model.load_state_dict(torch.load(ckpt, map_location=DEVICE))
+                result_html = visualize_attention(model, feats, drug_idx)
+    return render_template_string(
+    # ───────────── HTML (原 UI + 新输入框) ─────────────
+    """
+<!doctype html>
+<html lang="en"><head><meta charset="utf-8"><title>FusionDTI </title>
+<link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600&family=Poppins:wght@500;600&display=swap" rel="stylesheet">
+<style>
+:root{--bg:#f3f4f6;--card:#fff;--primary:#6366f1;--primary-dark:#4f46e5;--text:#111827;--border:#e5e7eb;}
+*{box-sizing:border-box;margin:0;padding:0}
+body{background:var(--bg);color:var(--text);font-family:Inter,system-ui,Arial,sans-serif;line-height:1.5;padding:32px 12px;}
+h1{font-family:Poppins,Inter,sans-serif;font-weight:600;font-size:1.7rem;text-align:center;margin-bottom:28px;letter-spacing:-.2px;}
+.card{max-width:1000px;margin:0 auto;background:var(--card);border:1px solid var(--border);
+      border-radius:12px;box-shadow:0 2px 6px rgba(0,0,0,.05);padding:32px 36px;}
+label{font-weight:500;margin-bottom:6px;display:block}
+textarea,input[type=file]{width:100%;font-size:.9rem;font-family:monospace;padding:10px 12px;
+      border:1px solid var(--border);border-radius:8px;background:#fff;resize:vertical;}
+textarea{min-height:90px}
+.btn{appearance:none;border:none;cursor:pointer;padding:12px 22px;border-radius:8px;font-weight:500;
+     font-family:Inter,sans-serif;transition:all .18s ease;color:#fff;}
+.btn-primary{background:var(--primary)}.btn-primary:hover{background:var(--primary-dark)}
+.btn-neutral{background:#9ca3af;}.btn-neutral:hover{background:#6b7280}
+.grid{display:grid;gap:22px}.grid-2{grid-template-columns:1fr 1fr}
+.vis-box{margin-top:28px;border:1px solid var(--border);border-radius:10px;overflow:auto;max-height:72vh;}
+pre{white-space:pre-wrap;word-break:break-all;font-family:monospace;margin-top:8px}
+/* ── tidy table for Top-20 list ─────────────────────────────── */
+table.tg{border-collapse:collapse;margin-top:4px;font-size:0.83rem}
+table.tg th,table.tg td{border:1px solid var(--border);padding:6px 8px;text-align:left}
+table.tg th{background:var(--bg);font-weight:600}
+</style>
+</head>
+<body>
+<h1> Token-level Visualiser for Drug-Target Interaction</h1>
+<!-- ───────────── Project Links (larger + spaced) ───────────── -->
+<div style="margin-top:24px; text-align:center;">
+  <a href="https://zhaohanm.github.io/FusionDTI.github.io/" target="_blank"
+     style="display:inline-block;margin:8px 18px;padding:10px 20px;
+            background:linear-gradient(to right,#10b981,#059669);color:white;
+            font-weight:600;border-radius:8px;font-size:0.9rem;
+            font-family:Inter,sans-serif;text-decoration:none;
+            box-shadow:0 2px 6px rgba(0,0,0,0.12);transition:all 0.2s ease-in-out;"
+     onmouseover="this.style.opacity='0.9'" onmouseout="this.style.opacity='1'">
+    🌐 Project Page
+  </a>
+  <a href="https://arxiv.org/abs/2406.01651" target="_blank"
+     style="display:inline-block;margin:8px 18px;padding:10px 20px;
+            background:linear-gradient(to right,#ef4444,#dc2626);color:white;
+            font-weight:600;border-radius:8px;font-size:0.9rem;
+            font-family:Inter,sans-serif;text-decoration:none;
+            box-shadow:0 2px 6px rgba(0,0,0,0.12);transition:all 0.2s ease-in-out;"
+     onmouseover="this.style.opacity='0.9'" onmouseout="this.style.opacity='1'">
+    📄 ArXiv: 2406.01651
+  </a>
+  <a href="https://github.com/ZhaohanM/FusionDTI" target="_blank"
+     style="display:inline-block;margin:8px 18px;padding:10px 20px;
+            background:linear-gradient(to right,#3b82f6,#2563eb);color:white;
+            font-weight:600;border-radius:8px;font-size:0.9rem;
+            font-family:Inter,sans-serif;text-decoration:none;
+            box-shadow:0 2px 6px rgba(0,0,0,0.12);transition:all 0.2s ease-in-out;"
+     onmouseover="this.style.opacity='0.9'" onmouseout="this.style.opacity='1'">
+    💻 GitHub Repo
+  </a>
+</div>
+<!-- ─────────────  Guidelines for Use  ───────────── -->
+<div class="card" style="margin-bottom:24px">
+  <h2 style="font-size:1.2rem;margin-bottom:14px">Guidelines for Use</h2>
+  <ul style="margin-left:18px;line-height:1.55;list-style:decimal;">
+    <li><strong>Convert protein structure into a structure-aware sequence:</strong>
+        Upload a <code>.pdb</code> or <code>.cif</code> file. A structure-aware
+        sequence will be generated using
+        <a href="https://github.com/steineggerlab/foldseek" target="_blank">Foldseek</a>,
+        based on 3D structures from
+        <a href="https://alphafold.ebi.ac.uk" target="_blank">AlphaFold&nbsp;DB</a> or the
+        <a href="https://www.rcsb.org" target="_blank">Protein Data Bank (PDB)</a>.</li>
+    <li><strong>If you only have an amino acid sequence or a UniProt ID,</strong>
+        you must first visit the
+        <a href="https://www.rcsb.org" target="_blank">Protein Data Bank (PDB)</a>
+        or <a href="https://alphafold.ebi.ac.uk" target="_blank">AlphaFold&nbsp;DB</a>
+        to search and download the corresponding <code>.cif</code> or <code>.pdb</code> file.</li>
+    <li><strong>Drug input supports both SELFIES and SMILES:</strong><br>
+        You can enter a SELFIES string directly, or paste a SMILES string.
+        SMILES will be automatically converted to SELFIES using
+        <a href="https://github.com/aspuru-guzik-group/selfies" target="_blank">SELFIES encoder</a>.
+        If conversion fails, a red error message will be displayed.</li>
+    <li>Optionally enter a <strong>1-based</strong> drug atom or substructure index
+        to highlight the Top-10 interacting protein residues.</li>
+    <li>After inference, you can use the
+        “Download PDF” link to export a high-resolution vector version.</li>
+  </ul>
+</div>
+<div class="card">
+<form method="POST" enctype="multipart/form-data" class="grid">
+  <div><label>Protein Structure (.pdb / .cif)</label>
+       <input type="file" name="structure_file">
+       <input type="hidden" name="tmp_structure_path" value="{{ tmp_structure_path }}"></div>
+  <div><label>Protein Sequence</label>
+       <textarea name="protein_sequence" placeholder="Confirm / paste sequence…">{{ protein_seq }}</textarea></div>
+  <div><label>Drug Sequence (SELFIES/SMILES)</label>
+       <textarea name="drug_sequence" placeholder="[C][C][O]/cco …">{{ drug_seq }}</textarea></div>
+    <label>Drug atom/substructure index (1-based) – show Top-10 related protein residue</label>
+        <input type="number" name="drug_idx" min="1" style="width:120px">
+  <div class="grid grid-2">
+    <button class="btn btn-primary" type="Inference" name="confirm_structure">Confirm Structure</button>
+    <button class="btn btn-primary" type="Inference" name="Inference">Inference</button>
+  </div>
+  <button class="btn btn-neutral" style="width:100%" type="Inference" name="clear">Clear</button>
+</form>
+{% if structure_seq %}
+  <div style="margin-top:18px"><strong>Structure-aware sequence:</strong><pre>{{ structure_seq }}</pre></div>
+{% endif %}
+{% if result_html %}
+  <div class="vis-box" style="margin-top:26px">{{ result_html|safe }}</div>
+{% endif %}
+</div></body></html>
+    """,
+    protein_seq=protein_seq, drug_seq=drug_seq, structure_seq=structure_seq,
+    result_html=result_html, tmp_structure_path=tmp_structure_path)
+# ───── run ─────────────────────────────────────────────────────
 if __name__ == "__main__":
+    app.run(debug=True, host="0.0.0.0", port=7860)