Zhaohan Meng commited on
Commit
44cf989
ยท
verified ยท
1 Parent(s): cff7f27

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +277 -361
app.py CHANGED
@@ -1,81 +1,36 @@
1
- # โ”€โ”€โ”€ monkey-patch gradio_client so bool schemas donโ€™t crash json_schema_to_python_type โ”€โ”€โ”€
2
- import gradio_client.utils as _gc_utils
3
-
4
- # back up originals
5
- _orig_get_type = _gc_utils.get_type
6
- _orig_json2py = _gc_utils._json_schema_to_python_type
7
-
8
- def _patched_get_type(schema):
9
- # treat any boolean schema as if it were an empty dict
10
- if isinstance(schema, bool):
11
- schema = {}
12
- return _orig_get_type(schema)
13
-
14
- def _patched_json_schema_to_python_type(schema, defs=None):
15
- # treat any boolean schema as if it were an empty dict
16
- if isinstance(schema, bool):
17
- schema = {}
18
- return _orig_json2py(schema, defs)
19
-
20
- _gc_utils.get_type = _patched_get_type
21
- _gc_utils._json_schema_to_python_type = _patched_json_schema_to_python_type
22
-
23
- # โ”€โ”€โ”€ now itโ€™s safe to import Gradio and build your interface โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
24
- import gradio as gr
25
- import os
26
- import sys
27
- import argparse
28
- import tempfile
29
- import shutil
30
- import base64
31
- import io
32
-
33
- import torch
34
  import selfies
35
  from rdkit import Chem
 
 
 
36
  import matplotlib
37
  matplotlib.use("Agg")
38
  import matplotlib.pyplot as plt
39
  from matplotlib import cm
40
  from typing import Optional
41
 
42
- from transformers import EsmForMaskedLM, EsmTokenizer, AutoModel
43
- from torch.utils.data import DataLoader
44
- from Bio.PDB import PDBParser, MMCIFParser
45
- from Bio.Data import IUPACData
46
-
47
  from utils.drug_tokenizer import DrugTokenizer
 
48
  from utils.metric_learning_models_att_maps import Pre_encoded, FusionDTI
49
  from utils.foldseek_util import get_struc_seq
50
 
51
- # โ”€โ”€โ”€โ”€โ”€ Helpers โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
52
-
53
- three2one = {k.upper(): v for k, v in IUPACData.protein_letters_3to1.items()}
54
- three2one.update({"MSE": "M", "SEC": "C", "PYL": "K"})
55
- def simple_seq_from_structure(path: str) -> str:
56
- parser = MMCIFParser(QUIET=True) if path.endswith(".cif") else PDBParser(QUIET=True)
57
- structure = parser.get_structure("P", path)
58
- chains = list(structure.get_chains())
59
- if not chains:
60
- return ""
61
- chain = max(chains, key=lambda c: len(list(c.get_residues())))
62
- return "".join(three2one.get(res.get_resname().upper(), "X") for res in chain)
63
-
64
- def smiles_to_selfies(smiles: str) -> Optional[str]:
65
- try:
66
- mol = Chem.MolFromSmiles(smiles)
67
- if mol is None:
68
- return None
69
- return selfies.encoder(smiles)
70
- except Exception:
71
- return None
72
 
73
  def parse_config():
74
  p = argparse.ArgumentParser()
 
75
  p.add_argument("--prot_encoder_path", default="westlake-repl/SaProt_650M_AF2")
76
  p.add_argument("--drug_encoder_path", default="HUBioDataLab/SELFormer")
77
- p.add_argument("--agg_mode", type=str, default="mean_all_tok")
78
  p.add_argument("--group_size", type=int, default=1)
 
79
  p.add_argument("--fusion", default="CAN")
80
  p.add_argument("--device", default="cuda" if torch.cuda.is_available() else "cpu")
81
  p.add_argument("--save_path_prefix", default="save_model_ckp/")
@@ -85,13 +40,16 @@ def parse_config():
85
  args = parse_config()
86
  DEVICE = args.device
87
 
88
- # โ”€โ”€โ”€โ”€โ”€ Load models & tokenizers โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
89
  prot_tokenizer = EsmTokenizer.from_pretrained(args.prot_encoder_path)
90
  prot_model = EsmForMaskedLM.from_pretrained(args.prot_encoder_path)
91
- drug_tokenizer = DrugTokenizer()
 
92
  drug_model = AutoModel.from_pretrained(args.drug_encoder_path)
93
- encoding = Pre_encoded(prot_model, drug_model, args).to(DEVICE)
94
 
 
 
 
95
  def collate_fn(batch):
96
  query1, query2, scores = zip(*batch)
97
 
@@ -117,8 +75,20 @@ def collate_fn(batch):
117
  attention_mask2 = query_encodings2["attention_mask"].bool()
118
 
119
  return query_encodings1["input_ids"], attention_mask1, query_encodings2["input_ids"], attention_mask2, scores
 
 
 
 
 
 
 
 
 
 
 
120
 
121
 
 
122
  def get_case_feature(model, loader):
123
  model.eval()
124
  with torch.no_grad():
@@ -130,12 +100,17 @@ def get_case_feature(model, loader):
130
  p_ids.cpu(), d_ids.cpu(),
131
  p_mask.cpu(), d_mask.cpu(), None)]
132
 
133
-
134
- # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ visualisation โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
 
 
 
 
 
135
  def visualize_attention(model, feats, drug_idx: Optional[int] = None) -> str:
136
  """
137
  Render a Protein โ†’ Drug cross-attention heat-map and, optionally, a
138
- Top-30 protein-residue table for a chosen drug-token index.
139
 
140
  The token index shown on the x-axis (and accepted via *drug_idx*) is **the
141
  position of that token in the *original* drug sequence**, *after* the
@@ -234,8 +209,8 @@ def visualize_attention(model, feats, drug_idx: Optional[int] = None) -> str:
234
  plt.close(fig)
235
  html = f'<img src="data:image/png;base64,{base64.b64encode(buf.getvalue()).decode()}" />'
236
 
237
- # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ Top-30 tabel โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
238
- table_html = ""
239
  if drug_idx is not None:
240
  # map original 0-based drug_idx โ†’ current column position
241
  if (drug_idx + 1) in d_indices:
@@ -247,7 +222,7 @@ def visualize_attention(model, feats, drug_idx: Optional[int] = None) -> str:
247
 
248
  if col_pos is not None:
249
  col_vec = attn[:, col_pos]
250
- topk = torch.topk(col_vec, k=min(30, len(col_vec))).indices.tolist()
251
 
252
  rank_hdr = "".join(f"<th>{r+1}</th>" for r in range(len(topk)))
253
  res_row = "".join(f"<td>{p_tokens[i]}</td>" for i in topk)
@@ -255,58 +230,24 @@ def visualize_attention(model, feats, drug_idx: Optional[int] = None) -> str:
255
 
256
  drug_tok_text = d_tokens[col_pos]
257
  orig_idx = d_indices[col_pos]
258
-
259
- # 1) build the header row: leading โ€œRankโ€, then 1โ€ฆ30
260
- header_cells = (
261
- "<th style='border:1px solid #ccc; padding:6px; "
262
- "background:#f7f7f7; text-align:center;'>Rank</th>"
263
- + "".join(
264
- f"<th style='border:1px solid #ccc; padding:6px; "
265
- f"background:#f7f7f7; text-align:center'>{r+1}</th>"
266
- for r in range(len(topk))
267
- )
268
- )
269
-
270
- # 2) build the residue row: leading โ€œResidueโ€, then the residue tokens
271
- residue_cells = (
272
- "<th style='border:1px solid #ccc; padding:6px; "
273
- "background:#f7f7f7; text-align:center;'>Residue</th>"
274
- + "".join(
275
- f"<td style='border:1px solid #ccc; padding:6px; "
276
- f"text-align:center'>{p_tokens[i]}</td>"
277
- for i in topk
278
- )
279
- )
280
-
281
- # 3) build the position row: leading โ€œPositionโ€, then the residue positions
282
- position_cells = (
283
- "<th style='border:1px solid #ccc; padding:6px; "
284
- "background:#f7f7f7; text-align:center;'>Position</th>"
285
- + "".join(
286
- f"<td style='border:1px solid #ccc; padding:6px; "
287
- f"text-align:center'>{p_indices[i]}</td>"
288
- for i in topk
289
- )
290
- )
291
-
292
- # 4) assemble your table_html
293
- table_html = (
294
- f"<h4 style='margin-bottom:12px'>"
295
- f"Drug atom #{orig_idx} <code>{drug_tok_text}</code> โ†’ Top-30 Protein residues"
296
- f"</h4>"
297
- f"<table style='border-collapse:collapse; margin:0 auto 24px;'>"
298
- f"<tr>{header_cells}</tr>"
299
- f"<tr>{residue_cells}</tr>"
300
- f"<tr>{position_cells}</tr>"
301
- f"</table>"
302
- )
303
 
 
 
 
 
 
 
 
 
 
 
 
304
  buf_png = io.BytesIO()
305
- fig.savefig(buf_png, format="png", dpi=140)
306
  buf_png.seek(0)
307
 
308
  buf_pdf = io.BytesIO()
309
- fig.savefig(buf_pdf, format="pdf")
310
  buf_pdf.seek(0)
311
  plt.close(fig)
312
 
@@ -314,253 +255,228 @@ def visualize_attention(model, feats, drug_idx: Optional[int] = None) -> str:
314
  pdf_b64 = base64.b64encode(buf_pdf.getvalue()).decode()
315
 
316
  html_heat = (
317
- f"<div style='position: relative; width: 100%;'>"
318
- # the PDF button, absolutely positioned
319
- f"<a href='data:application/pdf;base64,{pdf_b64}' download='attention_heatmap.pdf' "
320
- "style='position: absolute; top: 12px; right: 12px; "
321
- "background: var(--primary); color: #fff; "
322
- "padding: 8px 16px; border-radius: 6px; "
323
- "font-size: 0.9rem; font-weight: 500; "
324
- "text-decoration: none;'>"
325
- "Download PDF"
326
- "</a>"
327
- # the clickable heatโ€map image
328
- f"<a href='data:image/png;base64,{png_b64}' target='_blank' title='Click to enlarge'>"
329
- f"<img src='data:image/png;base64,{png_b64}' "
330
- "style='display: block; width: 100%; height: auto; cursor: zoom-in;'/>"
331
- "</a>"
332
- "</div>"
333
  )
334
 
 
335
  return table_html + html_heat
336
-
337
- # โ”€โ”€โ”€โ”€โ”€ Gradio Callbacks โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
338
-
339
- ROOT = os.path.dirname(os.path.abspath(__file__))
340
- FOLDSEEK_BIN = os.path.join(ROOT, "bin", "foldseek")
341
-
342
- def extract_sequence_cb(structure_file):
343
- if structure_file is None or not os.path.exists(structure_file.name):
344
- return ""
345
- parsed = get_struc_seq(FOLDSEEK_BIN, structure_file.name, None, plddt_mask=False)
346
- first_chain = next(iter(parsed))
347
- _, _, struct_seq = parsed[first_chain]
348
- return struct_seq
349
-
350
- def inference_cb(prot_seq, drug_seq, atom_idx):
351
- if not prot_seq:
352
- return "<p style='color:red'>Please extract or enter a protein sequence first.</p>"
353
- if not drug_seq.strip():
354
- return "<p style='color:red'>Please enter a drug sequence.</p>"
355
- if not drug_seq.strip().startswith("["):
356
- conv = smiles_to_selfies(drug_seq.strip())
357
- if conv is None:
358
- return "<p style='color:red'>SMILESโ†’SELFIES conversion failed.</p>"
359
- drug_seq = conv
360
- loader = DataLoader([(prot_seq, drug_seq, 1)], batch_size=1, collate_fn=collate_fn)
361
- feats = get_case_feature(encoding, loader)
362
- model = FusionDTI(446, 768, args).to(DEVICE)
363
- ckpt = os.path.join(f"{args.save_path_prefix}{args.dataset}_{args.fusion}", "best_model.ckpt")
364
- if os.path.isfile(ckpt):
365
- model.load_state_dict(torch.load(ckpt, map_location=DEVICE))
366
- return visualize_attention(model, feats, int(atom_idx)-1 if atom_idx else None)
367
-
368
- def clear_cb():
369
- return None, "", "", None, ""
370
-
371
- # โ”€โ”€โ”€โ”€โ”€ Gradio Interface Definition โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
372
-
373
- css = """
374
- :root {
375
- --bg: #f3f4f6;
376
- --card: #ffffff;
377
- --border: #e5e7eb;
378
- --primary: #6366f1;
379
- --primary-dark: #4f46e5;
380
- --text: #111827;
381
- }
382
- * { box-sizing: border-box; margin: 0; padding: 0; }
383
- body { background: var(--bg); color: var(--text); font-family: Inter,system-ui,Arial,sans-serif; }
384
- h1 { font-family: Poppins,Inter,sans-serif; font-weight: 600; font-size: 2rem; text-align: center; margin: 24px 0; }
385
- button, .gr-button { font-family: Inter,sans-serif; font-weight: 600; }
386
- #project-links { text-align: center; margin-bottom: 32px; }
387
- #project-links .gr-button { margin: 0 8px; min-width: 160px; }
388
- #project-links .gr-button:nth-child(1) { background: #10b981; }
389
- #project-links .gr-button:nth-child(2) { background: #ef4444; }
390
- #project-links .gr-button:nth-child(3) { background: #3b82f6; }
391
- #project-links .gr-button:hover { opacity: 0.9; }
392
- .link-btn{display:inline-block;margin:0 8px;padding:10px 20px;border-radius:8px;
393
- color:white;font-weight:600;text-decoration:none;box-shadow:0 2px 6px rgba(0,0,0,0.12);
394
- transition:all .2s ease-in-out;}
395
- .link-btn:hover{opacity:.9;}
396
- .link-btn.project{background:linear-gradient(to right,#10b981,#059669);}
397
- .link-btn.arxiv {background:linear-gradient(to right,#ef4444,#dc2626);}
398
- .link-btn.github {background:linear-gradient(to right,#3b82f6,#2563eb);}
399
-
400
- /* make *all* gradio buttons a bit taller */
401
- .gr-button { min-height: 10px !important; }
402
-
403
- /* now target just our two big action buttons */
404
- #extract-btn, #inference-btn {
405
- width: 5px !important;
406
- min-height: 36px !important;
407
- margin-top: 12px !important;
408
- }
409
-
410
- /* and make clear button full width but shorter */
411
- #clear-btn {
412
- width: 10px !important;
413
- min-height: 36px !important;
414
- margin-top: 12px !important;
415
- }
416
-
417
- #input-card label {
418
- font-weight: 600 !important; /* make the text bold */
419
- color: var(--text) !important; /* use your standard text color */
420
- }
421
-
422
- .card {
423
- background: var(--card);
424
- border: 1px solid var(--border);
425
- border-radius: 12px;
426
- padding: 24px;
427
- max-width: 1000px;
428
- margin: 0 auto 32px;
429
- box-shadow: 0 2px 6px rgba(0,0,0,0.05);
430
- }
431
-
432
- #guidelines-card h2 {
433
- font-size: 1.4rem;
434
- margin-bottom: 16px;
435
- text-align: center;
436
- }
437
- #guidelines-card ol {
438
- margin-left: 20px;
439
- line-height: 1.6;
440
- font-size: 1rem;
441
- }
442
- #input-card .gr-row, #input-card .gr-cols {
443
- gap: 16px;
444
- }
445
- #input-card .gr-button {
446
- flex: 1;
447
- }
448
- #output-card {
449
- padding-top: 0;
450
- }
451
- """
452
-
453
- with gr.Blocks(css=css) as demo:
454
- # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ Title โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
455
- gr.Markdown("<h1>Token-level Visualiser for Drug-Target Interaction</h1>")
456
-
457
- # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ Project Links โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
458
- gr.Markdown("""
459
- <div style="text-align:center;margin-bottom:32px;">
460
- <a class="link-btn project" href="https://zhaohanm.github.io/FusionDTI.github.io/" target="_blank">๐ŸŒ Project Page</a>
461
- <a class="link-btn arxiv" href="https://arxiv.org/abs/2406.01651" target="_blank">๐Ÿ“„ ArXiv: 2406.01651</a>
462
- <a class="link-btn github" href="https://github.com/ZhaohanM/FusionDTI" target="_blank">๐Ÿ’ป GitHub Repo</a>
463
- </div>
464
- """)
465
- # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ Guidelines Card โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
466
 
467
- gr.HTML(
468
- """
469
- <div class="card" style="margin-bottom:24px">
470
- <h2 style="font-size:1.2rem;margin-bottom:14px">Guidelines for User</h2>
471
- <ul style="font-size:1rem; margin-left:18px;line-height:1.55;list-style:decimal;">
472
- <li><strong>Convert protein structure into a structure-aware sequence:</strong>
473
- Upload a <code>.pdb</code> or <code>.cif</code> file. A structure-aware
474
- sequence will be generated using
475
- <a href="https://github.com/steineggerlab/foldseek" target="_blank">Foldseek</a>,
476
- based on 3D structures from
477
- <a href="https://alphafold.ebi.ac.uk" target="_blank">AlphaFold&nbsp;DB</a> or the
478
- <a href="https://www.rcsb.org" target="_blank">Protein Data Bank (PDB)</a>.</li>
479
-
480
- <li><strong>If you only have an amino acid sequence or a UniProt ID,</strong>
481
- you must first visit the
482
- <a href="https://www.rcsb.org" target="_blank">Protein Data Bank (PDB)</a>
483
- or <a href="https://alphafold.ebi.ac.uk" target="_blank">AlphaFold&nbsp;DB</a>
484
- to search and download the corresponding <code>.cif</code> or <code>.pdb</code> file.</li>
485
-
486
- <li><strong>Drug input supports both SELFIES and SMILES:</strong><br>
487
- You can enter a SELFIES string directly, or paste a SMILES string.
488
- SMILES will be automatically converted to SELFIES using
489
- <a href="https://github.com/aspuru-guzik-group/selfies" target="_blank">SELFIES encoder</a>.
490
- If conversion fails, a red error message will be displayed.</li>
491
-
492
- <li>Optionally enter a <strong>1-based</strong> drug atom or substructure index
493
- to highlight the Top-30 interacting protein residues.</li>
494
-
495
- <li>After inference, you can use the
496
- โ€œDownload PDFโ€ link to export a high-resolution vector version.</li>
497
- </ul>
498
- </div>
499
- """)
500
-
501
- # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ Input Card โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
502
- with gr.Column(elem_id="input-card", elem_classes="card"):
503
-
504
- protein_seq = gr.Textbox(
505
- label="Protein Structure-aware Sequence",
506
- lines=3,
507
- elem_id="protein-seq"
508
- )
509
- drug_seq = gr.Textbox(
510
- label="Drug Sequence (SELFIES/SMILES)",
511
- lines=3,
512
- elem_id="drug-seq"
513
- )
514
- structure_file = gr.File(
515
- label="Upload Protein Structure (.pdb/.cif)",
516
- file_types=[".pdb", ".cif"],
517
- elem_id="structure-file"
518
- )
519
- drug_idx = gr.Textbox(
520
- label="Drug atom index (1-based)",
521
- lines=1,
522
- elem_id="drug-idx"
523
- )
524
-
525
- # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ Action Buttons โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
526
- with gr.Row(elem_id="action-buttons", equal_height=True):
527
- btn_extract = gr.Button(
528
- "Extract sequence",
529
- variant="primary",
530
- elem_id="extract-btn"
531
- )
532
- btn_infer = gr.Button(
533
- "Inference",
534
- variant="primary",
535
- elem_id="inference-btn"
536
- )
537
- with gr.Row():
538
- clear_btn = gr.Button(
539
- "Clear",
540
- variant="secondary",
541
- elem_classes="full-width",
542
- elem_id="clear-btn"
543
- )
544
-
545
- # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ Output Visualization โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
546
- output_html = gr.HTML(elem_id="result-html")
547
-
548
- # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ Event Wiring โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
549
- btn_extract.click(
550
- fn=extract_sequence_cb,
551
- inputs=[structure_file],
552
- outputs=[protein_seq]
553
- )
554
- btn_infer.click(
555
- fn=inference_cb,
556
- inputs=[protein_seq, drug_seq, drug_idx],
557
- outputs=[output_html]
558
- )
559
- clear_btn.click(
560
- fn=lambda: ("", "", "", ""),
561
- inputs=[],
562
- outputs=[protein_seq, drug_seq, drug_idx, output_html]
563
- )
564
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
565
  if __name__ == "__main__":
566
- demo.launch(share=True)
 
1
+ import os, sys, argparse, tempfile, shutil, base64, io
2
+ from flask import Flask, request, render_template_string
3
+ from werkzeug.utils import secure_filename
4
+ from torch.utils.data import DataLoader
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  import selfies
6
  from rdkit import Chem
7
+ import app as gr
8
+
9
+ import torch
10
  import matplotlib
11
  matplotlib.use("Agg")
12
  import matplotlib.pyplot as plt
13
  from matplotlib import cm
14
  from typing import Optional
15
 
 
 
 
 
 
16
  from utils.drug_tokenizer import DrugTokenizer
17
+ from transformers import EsmForMaskedLM, EsmTokenizer, AutoModel
18
  from utils.metric_learning_models_att_maps import Pre_encoded, FusionDTI
19
  from utils.foldseek_util import get_struc_seq
20
 
21
+ # โ”€โ”€โ”€โ”€โ”€ global paths / args โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
22
+ FOLDSEEK_BIN = shutil.which("foldseek")
23
+ os.environ["TOKENIZERS_PARALLELISM"] = "false"
24
+ sys.path.append("..")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
  def parse_config():
27
  p = argparse.ArgumentParser()
28
+ p.add_argument("-f")
29
  p.add_argument("--prot_encoder_path", default="westlake-repl/SaProt_650M_AF2")
30
  p.add_argument("--drug_encoder_path", default="HUBioDataLab/SELFormer")
31
+ p.add_argument("--agg_mode", default="mean_all_tok", type=str, help="{cls|mean|mean_all_tok}")
32
  p.add_argument("--group_size", type=int, default=1)
33
+ p.add_argument("--lr", type=float, default=1e-4)
34
  p.add_argument("--fusion", default="CAN")
35
  p.add_argument("--device", default="cuda" if torch.cuda.is_available() else "cpu")
36
  p.add_argument("--save_path_prefix", default="save_model_ckp/")
 
40
  args = parse_config()
41
  DEVICE = args.device
42
 
43
+ # โ”€โ”€โ”€โ”€โ”€ tokenisers & encoders โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
44
  prot_tokenizer = EsmTokenizer.from_pretrained(args.prot_encoder_path)
45
  prot_model = EsmForMaskedLM.from_pretrained(args.prot_encoder_path)
46
+
47
+ drug_tokenizer = DrugTokenizer() # SELFIES
48
  drug_model = AutoModel.from_pretrained(args.drug_encoder_path)
 
49
 
50
+ encoding = Pre_encoded(prot_model, drug_model, args).to(DEVICE)
51
+
52
+ # โ”€โ”€โ”€ collate fn โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
53
  def collate_fn(batch):
54
  query1, query2, scores = zip(*batch)
55
 
 
75
  attention_mask2 = query_encodings2["attention_mask"].bool()
76
 
77
  return query_encodings1["input_ids"], attention_mask1, query_encodings2["input_ids"], attention_mask2, scores
78
+ # def collate_fn_batch_encoding(batch):
79
+
80
+ def smiles_to_selfies(smiles: str) -> Optional[str]:
81
+ try:
82
+ mol = Chem.MolFromSmiles(smiles)
83
+ if mol is None:
84
+ return None
85
+ selfies_str = selfies.encoder(smiles)
86
+ return selfies_str
87
+ except Exception:
88
+ return None
89
 
90
 
91
+ # โ”€โ”€โ”€โ”€โ”€ single-case embedding โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
92
  def get_case_feature(model, loader):
93
  model.eval()
94
  with torch.no_grad():
 
100
  p_ids.cpu(), d_ids.cpu(),
101
  p_mask.cpu(), d_mask.cpu(), None)]
102
 
103
+ # โ”€โ”€โ”€โ”€โ”€ helper๏ผš่ฟ‡ๆปค็‰นๆฎŠ token โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
104
+ def clean_tokens(ids, tokenizer):
105
+ toks = tokenizer.convert_ids_to_tokens(ids.tolist())
106
+ return [t for t in toks if t not in tokenizer.all_special_tokens]
107
+
108
+ # โ”€โ”€โ”€โ”€โ”€ visualisation โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
109
+
110
  def visualize_attention(model, feats, drug_idx: Optional[int] = None) -> str:
111
  """
112
  Render a Protein โ†’ Drug cross-attention heat-map and, optionally, a
113
+ Top-20 protein-residue table for a chosen drug-token index.
114
 
115
  The token index shown on the x-axis (and accepted via *drug_idx*) is **the
116
  position of that token in the *original* drug sequence**, *after* the
 
209
  plt.close(fig)
210
  html = f'<img src="data:image/png;base64,{base64.b64encode(buf.getvalue()).decode()}" />'
211
 
212
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ ็”Ÿๆˆ Top-20 ่กจ๏ผˆ่‹ฅ้œ€่ฆ๏ผ‰ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
213
+ table_html = "" # ๅ…ˆ่ฎพ็ฉบไธฒ๏ผŒๆ–นไพฟๅŽ้ข็ปŸไธ€ๆ‹ผๆŽฅ
214
  if drug_idx is not None:
215
  # map original 0-based drug_idx โ†’ current column position
216
  if (drug_idx + 1) in d_indices:
 
222
 
223
  if col_pos is not None:
224
  col_vec = attn[:, col_pos]
225
+ topk = torch.topk(col_vec, k=min(20, len(col_vec))).indices.tolist()
226
 
227
  rank_hdr = "".join(f"<th>{r+1}</th>" for r in range(len(topk)))
228
  res_row = "".join(f"<td>{p_tokens[i]}</td>" for i in topk)
 
230
 
231
  drug_tok_text = d_tokens[col_pos]
232
  orig_idx = d_indices[col_pos]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
233
 
234
+ table_html = (
235
+ f"<h4 style='margin-bottom:6px'>"
236
+ f"Drug token #{orig_idx} <code>{drug_tok_text}</code> "
237
+ f"โ†’ Top-20 Protein residues</h4>"
238
+ "<table class='tg' style='margin-bottom:8px'>"
239
+ f"<tr><th>Rank</th>{rank_hdr}</tr>"
240
+ f"<tr><td>Residue</td>{res_row}</tr>"
241
+ f"<tr><td>Position</td>{pos_row}</tr>"
242
+ "</table>")
243
+
244
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ ็”Ÿๆˆๅฏๆ”พๅคง + ๅฏไธ‹่ฝฝ็š„็ƒญๅ›พ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
245
  buf_png = io.BytesIO()
246
+ fig.savefig(buf_png, format="png", dpi=140) # ้ข„่งˆ๏ผˆๅ…‰ๆ …๏ผ‰
247
  buf_png.seek(0)
248
 
249
  buf_pdf = io.BytesIO()
250
+ fig.savefig(buf_pdf, format="pdf") # ้ซ˜ๆธ…ไธ‹่ฝฝ๏ผˆ็Ÿข้‡๏ผ‰
251
  buf_pdf.seek(0)
252
  plt.close(fig)
253
 
 
255
  pdf_b64 = base64.b64encode(buf_pdf.getvalue()).decode()
256
 
257
  html_heat = (
258
+ f"<a href='data:image/png;base64,{png_b64}' target='_blank' "
259
+ f"title='Click to enlarge'>"
260
+ f"<img src='data:image/png;base64,{png_b64}' "
261
+ f"style='max-width:100%;height:auto;cursor:zoom-in' /></a>"
262
+ f"<div style='margin-top:6px'>"
263
+ f"<a href='data:application/pdf;base64,{pdf_b64}' "
264
+ f"download='attention_heatmap.pdf'>Download PDF</a></div>"
 
 
 
 
 
 
 
 
 
265
  )
266
 
267
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ ่ฟ”ๅ›žๆœ€็ปˆ HTML โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
268
  return table_html + html_heat
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
269
 
270
+ def inference(protein_seq, drug_seq, drug_idx, structure_file):
271
+ # โ€”โ€” ่ฟ™ไธ€ๅ—ๆขๆˆ Gradio ๅ–ๆ–‡ไปถ่ทฏๅพ„ โ€”โ€”
272
+ if structure_file is not None and os.path.exists(structure_file.name):
273
+ tmp_structure_path = structure_file.name
274
+ else:
275
+ return "<p style='color:red'>่ฏทๅ…ˆไธŠไผ ไธ€ไธชๆœ‰ๆ•ˆ็š„ .pdb ๆˆ– .cif ๆ–‡ไปถใ€‚</p>"
276
+
277
+ # ่ฐƒ็”จ foldseek
278
+ try:
279
+ parsed = get_struc_seq(FOLDSEEK_BIN, tmp_structure_path, ["A"], plddt_mask=False)
280
+ chain = next(iter(parsed))
281
+ protein_seq = parsed[chain][2]
282
+ except Exception as e:
283
+ return f"<p style='color:red'>Foldseek ๆๅ–ๅคฑ่ดฅ๏ผš{e}</p>"
284
+
285
+ # โ”€โ”€โ”€โ”€โ”€ Flask app โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
286
+ app = Flask(__name__)
287
+
288
+ @app.route("/", methods=["GET", "POST"])
289
+ def index():
290
+ protein_seq = drug_seq = structure_seq = ""; result_html = None
291
+ tmp_structure_path = ""; drug_idx = None
292
+
293
+ if request.method == "POST":
294
+ drug_idx_raw = request.form.get("drug_idx", "")
295
+ drug_idx = int(drug_idx_raw)-1 if drug_idx_raw.isdigit() else None
296
+
297
+ struct = request.files.get("structure_file")
298
+ if struct and struct.filename:
299
+ tmp_dir = tempfile.mkdtemp(prefix="foldseek_")
300
+ safe_name = secure_filename(struct.filename)
301
+ tmp_structure_path = os.path.join(tmp_dir, safe_name)
302
+ struct.save(tmp_structure_path)
303
+ else:
304
+ tmp_structure_path = request.form.get("tmp_structure_path", "")
305
+
306
+ if "clear" in request.form:
307
+ protein_seq = drug_seq = structure_seq = ""; tmp_structure_path = ""
308
+
309
+ elif "confirm_structure" in request.form and tmp_structure_path:
310
+ try:
311
+ parsed_seqs = get_struc_seq(FOLDSEEK_BIN, tmp_structure_path, ["A"], plddt_mask=False)["A"]
312
+ seq, foldseek_seq, structure_seq = parsed_seqs # ็”จๅฎŒๅŽๆธ…้™ค็›ฎๅฝ•
313
+ except Exception as e:
314
+ result_html = (
315
+ "<p style='color:red'><strong>Foldseek failed to extract sequence "
316
+ f"from structure: {e}</strong></p>")
317
+ structure_seq = ""
318
+
319
+ protein_seq = structure_seq
320
+ drug_input = request.form.get("drug_sequence", "")
321
+ # Heuristically check if input is SMILES (not starting with [) and convert
322
+ if not drug_input.strip().startswith("["):
323
+ converted = smiles_to_selfies(drug_input.strip())
324
+ if converted:
325
+ drug_seq = converted
326
+ else:
327
+ drug_seq = ""
328
+ result_html = "<p style='color:red'><strong>Failed to convert SMILES to SELFIES. Please check the input string.</strong></p>"
329
+ else:
330
+ drug_seq = drug_input
331
+
332
+ elif "Inference" in request.form:
333
+ protein_seq = request.form.get("protein_sequence", "")
334
+ drug_seq = request.form.get("drug_sequence", "")
335
+ if protein_seq and drug_seq:
336
+ loader = DataLoader([(protein_seq, drug_seq, 1)], batch_size=1,
337
+ collate_fn=collate_fn)
338
+ feats = get_case_feature(encoding, loader)
339
+ model = FusionDTI(446, 768, args).to(DEVICE)
340
+ ckpt = os.path.join(f"{args.save_path_prefix}{args.dataset}_{args.fusion}",
341
+ "best_model.ckpt")
342
+ if os.path.isfile(ckpt):
343
+ model.load_state_dict(torch.load(ckpt, map_location=DEVICE))
344
+ result_html = visualize_attention(model, feats, drug_idx)
345
+
346
+ return render_template_string(
347
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ HTML (ๅŽŸ UI + ๆ–ฐ่พ“ๅ…ฅๆก†) โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
348
+ """
349
+ <!doctype html>
350
+ <html lang="en"><head><meta charset="utf-8"><title>FusionDTI </title>
351
+ <link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600&family=Poppins:wght@500;600&display=swap" rel="stylesheet">
352
+
353
+ <style>
354
+ :root{--bg:#f3f4f6;--card:#fff;--primary:#6366f1;--primary-dark:#4f46e5;--text:#111827;--border:#e5e7eb;}
355
+ *{box-sizing:border-box;margin:0;padding:0}
356
+ body{background:var(--bg);color:var(--text);font-family:Inter,system-ui,Arial,sans-serif;line-height:1.5;padding:32px 12px;}
357
+ h1{font-family:Poppins,Inter,sans-serif;font-weight:600;font-size:1.7rem;text-align:center;margin-bottom:28px;letter-spacing:-.2px;}
358
+ .card{max-width:1000px;margin:0 auto;background:var(--card);border:1px solid var(--border);
359
+ border-radius:12px;box-shadow:0 2px 6px rgba(0,0,0,.05);padding:32px 36px;}
360
+ label{font-weight:500;margin-bottom:6px;display:block}
361
+ textarea,input[type=file]{width:100%;font-size:.9rem;font-family:monospace;padding:10px 12px;
362
+ border:1px solid var(--border);border-radius:8px;background:#fff;resize:vertical;}
363
+ textarea{min-height:90px}
364
+ .btn{appearance:none;border:none;cursor:pointer;padding:12px 22px;border-radius:8px;font-weight:500;
365
+ font-family:Inter,sans-serif;transition:all .18s ease;color:#fff;}
366
+ .btn-primary{background:var(--primary)}.btn-primary:hover{background:var(--primary-dark)}
367
+ .btn-neutral{background:#9ca3af;}.btn-neutral:hover{background:#6b7280}
368
+ .grid{display:grid;gap:22px}.grid-2{grid-template-columns:1fr 1fr}
369
+ .vis-box{margin-top:28px;border:1px solid var(--border);border-radius:10px;overflow:auto;max-height:72vh;}
370
+ pre{white-space:pre-wrap;word-break:break-all;font-family:monospace;margin-top:8px}
371
+
372
+ /* โ”€โ”€ tidy table for Top-20 list โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ */
373
+ table.tg{border-collapse:collapse;margin-top:4px;font-size:0.83rem}
374
+ table.tg th,table.tg td{border:1px solid var(--border);padding:6px 8px;text-align:left}
375
+ table.tg th{background:var(--bg);font-weight:600}
376
+ </style>
377
+ </head>
378
+ <body>
379
+ <h1> Token-level Visualiser for Drug-Target Interaction</h1>
380
+
381
+ <!-- โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ Project Links (larger + spaced) โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ -->
382
+ <div style="margin-top:24px; text-align:center;">
383
+ <a href="https://zhaohanm.github.io/FusionDTI.github.io/" target="_blank"
384
+ style="display:inline-block;margin:8px 18px;padding:10px 20px;
385
+ background:linear-gradient(to right,#10b981,#059669);color:white;
386
+ font-weight:600;border-radius:8px;font-size:0.9rem;
387
+ font-family:Inter,sans-serif;text-decoration:none;
388
+ box-shadow:0 2px 6px rgba(0,0,0,0.12);transition:all 0.2s ease-in-out;"
389
+ onmouseover="this.style.opacity='0.9'" onmouseout="this.style.opacity='1'">
390
+ ๐ŸŒ Project Page
391
+ </a>
392
+
393
+ <a href="https://arxiv.org/abs/2406.01651" target="_blank"
394
+ style="display:inline-block;margin:8px 18px;padding:10px 20px;
395
+ background:linear-gradient(to right,#ef4444,#dc2626);color:white;
396
+ font-weight:600;border-radius:8px;font-size:0.9rem;
397
+ font-family:Inter,sans-serif;text-decoration:none;
398
+ box-shadow:0 2px 6px rgba(0,0,0,0.12);transition:all 0.2s ease-in-out;"
399
+ onmouseover="this.style.opacity='0.9'" onmouseout="this.style.opacity='1'">
400
+ ๐Ÿ“„ ArXiv: 2406.01651
401
+ </a>
402
+
403
+ <a href="https://github.com/ZhaohanM/FusionDTI" target="_blank"
404
+ style="display:inline-block;margin:8px 18px;padding:10px 20px;
405
+ background:linear-gradient(to right,#3b82f6,#2563eb);color:white;
406
+ font-weight:600;border-radius:8px;font-size:0.9rem;
407
+ font-family:Inter,sans-serif;text-decoration:none;
408
+ box-shadow:0 2px 6px rgba(0,0,0,0.12);transition:all 0.2s ease-in-out;"
409
+ onmouseover="this.style.opacity='0.9'" onmouseout="this.style.opacity='1'">
410
+ ๐Ÿ’ป GitHub Repo
411
+ </a>
412
+ </div>
413
+
414
+ <!-- โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ Guidelines for Use โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ -->
415
+ <div class="card" style="margin-bottom:24px">
416
+ <h2 style="font-size:1.2rem;margin-bottom:14px">Guidelines for Use</h2>
417
+ <ul style="margin-left:18px;line-height:1.55;list-style:decimal;">
418
+ <li><strong>Convert protein structure into a structure-aware sequence:</strong>
419
+ Upload a <code>.pdb</code> or <code>.cif</code> file. A structure-aware
420
+ sequence will be generated using
421
+ <a href="https://github.com/steineggerlab/foldseek" target="_blank">Foldseek</a>,
422
+ based on 3D structures from
423
+ <a href="https://alphafold.ebi.ac.uk" target="_blank">AlphaFold&nbsp;DB</a> or the
424
+ <a href="https://www.rcsb.org" target="_blank">Protein Data Bank (PDB)</a>.</li>
425
+
426
+ <li><strong>If you only have an amino acid sequence or a UniProt ID,</strong>
427
+ you must first visit the
428
+ <a href="https://www.rcsb.org" target="_blank">Protein Data Bank (PDB)</a>
429
+ or <a href="https://alphafold.ebi.ac.uk" target="_blank">AlphaFold&nbsp;DB</a>
430
+ to search and download the corresponding <code>.cif</code> or <code>.pdb</code> file.</li>
431
+
432
+ <li><strong>Drug input supports both SELFIES and SMILES:</strong><br>
433
+ You can enter a SELFIES string directly, or paste a SMILES string.
434
+ SMILES will be automatically converted to SELFIES using
435
+ <a href="https://github.com/aspuru-guzik-group/selfies" target="_blank">SELFIES encoder</a>.
436
+ If conversion fails, a red error message will be displayed.</li>
437
+
438
+ <li>Optionally enter a <strong>1-based</strong> drug atom or substructure index
439
+ to highlight the Top-10 interacting protein residues.</li>
440
+
441
+ <li>After inference, you can use the
442
+ โ€œDownload PDFโ€ link to export a high-resolution vector version.</li>
443
+ </ul>
444
+ </div>
445
+
446
+ <div class="card">
447
+ <form method="POST" enctype="multipart/form-data" class="grid">
448
+
449
+ <div><label>Protein Structure (.pdb / .cif)</label>
450
+ <input type="file" name="structure_file">
451
+ <input type="hidden" name="tmp_structure_path" value="{{ tmp_structure_path }}"></div>
452
+
453
+ <div><label>Protein Sequence</label>
454
+ <textarea name="protein_sequence" placeholder="Confirm / paste sequenceโ€ฆ">{{ protein_seq }}</textarea></div>
455
+
456
+ <div><label>Drug Sequence (SELFIES/SMILES)</label>
457
+ <textarea name="drug_sequence" placeholder="[C][C][O]/cco โ€ฆ">{{ drug_seq }}</textarea></div>
458
+
459
+ <label>Drug atom/substructure index (1-based) โ€“ show Top-10 related protein residue</label>
460
+ <input type="number" name="drug_idx" min="1" style="width:120px">
461
+
462
+ <div class="grid grid-2">
463
+ <button class="btn btn-primary" type="Inference" name="confirm_structure">Confirm Structure</button>
464
+ <button class="btn btn-primary" type="Inference" name="Inference">Inference</button>
465
+ </div>
466
+ <button class="btn btn-neutral" style="width:100%" type="Inference" name="clear">Clear</button>
467
+ </form>
468
+
469
+ {% if structure_seq %}
470
+ <div style="margin-top:18px"><strong>Structure-aware sequence:</strong><pre>{{ structure_seq }}</pre></div>
471
+ {% endif %}
472
+ {% if result_html %}
473
+ <div class="vis-box" style="margin-top:26px">{{ result_html|safe }}</div>
474
+ {% endif %}
475
+ </div></body></html>
476
+ """,
477
+ protein_seq=protein_seq, drug_seq=drug_seq, structure_seq=structure_seq,
478
+ result_html=result_html, tmp_structure_path=tmp_structure_path)
479
+
480
+ # โ”€โ”€โ”€โ”€โ”€ run โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
481
  if __name__ == "__main__":
482
+ app.run(debug=True, host="0.0.0.0", port=7860)