s1ome123 commited on
Commit
2b41fd4
·
verified ·
1 Parent(s): 232a9f4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -8
app.py CHANGED
@@ -1,21 +1,24 @@
 
 
1
  import gradio as gr
2
- from sentence_transformers import SentenceTransformer
3
 
4
- # Load a model that outputs 1024-dim vectors
5
- model = SentenceTransformer('intfloat/e5-large')
 
6
 
7
  def embed_text(text):
8
- # Optionally prepend "passage: " if using e5 models
9
- processed_text = "passage: " + text.strip()
10
- embedding = model.encode(processed_text).tolist()
 
 
11
  return embedding
12
 
13
- # Gradio interface
14
  iface = gr.Interface(
15
  fn=embed_text,
16
  inputs=gr.Textbox(lines=5, label="Enter patient text"),
17
  outputs="json",
18
- title="Clinical Text Embedding API (1024-dim)"
19
  )
20
 
21
  iface.launch()
 
1
+ from transformers import AutoTokenizer, AutoModel
2
+ import torch
3
  import gradio as gr
 
4
 
5
+ # Load Bio_ClinicalBERT
6
+ tokenizer = AutoTokenizer.from_pretrained("emilyalsentzer/Bio_ClinicalBERT")
7
+ model = AutoModel.from_pretrained("emilyalsentzer/Bio_ClinicalBERT")
8
 
9
  def embed_text(text):
10
+ inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
11
+ with torch.no_grad():
12
+ outputs = model(**inputs)
13
+ # Mean pooling
14
+ embedding = outputs.last_hidden_state.mean(dim=1).squeeze().tolist()
15
  return embedding
16
 
 
17
  iface = gr.Interface(
18
  fn=embed_text,
19
  inputs=gr.Textbox(lines=5, label="Enter patient text"),
20
  outputs="json",
21
+ title="Clinical Text Embedding API (Bio_ClinicalBERT)"
22
  )
23
 
24
  iface.launch()