import gradio as gr
from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline

# Load BioBERT model for NER
model_name = "dmis-lab/biobert-v1.1"  # pretrained BioBERT for biomedical NER
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForTokenClassification.from_pretrained(model_name)

# Create a NER pipeline
ner_pipeline = pipeline("ner", model=model, tokenizer=tokenizer, aggregation_strategy="simple")

# Inference function
def bio_ner(text):
    ner_results = ner_pipeline(text)
    annotated = ""
    last_end = 0
    for ent in ner_results:
        start, end, label = ent['start'], ent['end'], ent['entity_group']
        annotated += text[last_end:start]
        annotated += f"[{text[start:end]}]({label})"
        last_end = end
    annotated += text[last_end:]
    return annotated

# Gradio interface
gr.Interface(
    fn=bio_ner,
    inputs=gr.Textbox(lines=5, placeholder="Enter biomedical text here..."),
    outputs="text",
    title="🧬 BioBERT NER",
    description="Uses BioBERT to perform Named Entity Recognition on biomedical text."
).launch()