|
from transformers import AutoTokenizer, AutoModel |
|
import torch |
|
import gradio as gr |
|
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained("emilyalsentzer/Bio_ClinicalBERT") |
|
model = AutoModel.from_pretrained("emilyalsentzer/Bio_ClinicalBERT") |
|
|
|
def embed_text(text): |
|
inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True) |
|
with torch.no_grad(): |
|
outputs = model(**inputs) |
|
|
|
embedding = outputs.last_hidden_state.mean(dim=1).squeeze().tolist() |
|
return embedding |
|
|
|
iface = gr.Interface( |
|
fn=embed_text, |
|
inputs=gr.Textbox(lines=5, label="Enter patient text"), |
|
outputs="json", |
|
title="Clinical Text Embedding API (Bio_ClinicalBERT)" |
|
) |
|
|
|
iface.launch() |
|
|