Spaces:
Sleeping
Sleeping
import streamlit as st | |
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline | |
from PIL import Image | |
import pytesseract | |
import PyPDF2 | |
import pdfplumber | |
import torch | |
# Load ClinicalBERT model for medical text analysis | |
def load_clinicalbert(): | |
tokenizer = AutoTokenizer.from_pretrained("emilyalsentzer/Bio_ClinicalBERT") | |
model = AutoModelForSequenceClassification.from_pretrained("emilyalsentzer/Bio_ClinicalBERT") | |
return pipeline("zero-shot-classification", model=model, tokenizer=tokenizer, device=0 if torch.cuda.is_available() else -1) | |
# Initialize ClinicalBERT classifier | |
clinical_bert = load_clinicalbert() | |
# OCR for Image using Tesseract | |
def extract_text_from_image(image): | |
return pytesseract.image_to_string(image) | |
# Extract text from PDF using pdfplumber | |
def extract_text_from_pdf(pdf_file): | |
text = "" | |
with pdfplumber.open(pdf_file) as pdf: | |
for page in pdf.pages: | |
text += page.extract_text() or "" | |
return text | |
# Analyze and interpret the medical report using ClinicalBERT | |
def analyze_medical_text(text): | |
# Summarize the extracted text | |
summary = clinical_bert(text, candidate_labels=["summary", "overview", "findings"], multi_label=False)['labels'][0] | |
# Provide detailed interpretation | |
interpretation = clinical_bert( | |
text, | |
candidate_labels=["normal", "abnormal", "urgent", "needs follow-up", "critical condition"], | |
multi_label=True | |
) | |
# Provide actionable recommendations | |
recommendations = clinical_bert( | |
text, | |
candidate_labels=["medication", "dietary change", "exercise", "follow-up with a doctor", "lifestyle change"], | |
multi_label=True | |
) | |
return { | |
"summary": summary, | |
"interpretation": interpretation['labels'], | |
"recommendations": recommendations['labels'] | |
} | |
# Streamlit UI | |
st.title("Medical Lab Report Analyzer with ClinicalBERT") | |
st.write("Upload your medical lab report (PDF/Image) to get a summary and actionable insights using ClinicalBERT.") | |
uploaded_file = st.file_uploader("Choose a PDF/Image file", type=["pdf", "png", "jpg", "jpeg"]) | |
if uploaded_file: | |
file_type = uploaded_file.type | |
# Extract text based on file type | |
if file_type == "application/pdf": | |
with st.spinner("Extracting text from PDF..."): | |
extracted_text = extract_text_from_pdf(uploaded_file) | |
else: | |
with st.spinner("Extracting text from Image..."): | |
image = Image.open(uploaded_file) | |
extracted_text = extract_text_from_image(image) | |
# Analyze the extracted text | |
if extracted_text.strip(): | |
with st.spinner("Analyzing report using ClinicalBERT..."): | |
result = analyze_medical_text(extracted_text) | |
# Display the results | |
st.subheader("Summary of the Report") | |
st.write(result['summary']) | |
st.subheader("Interpretation of Results") | |
for label in result['interpretation']: | |
st.write(f"- {label.capitalize()}") | |
st.subheader("Actionable Recommendations") | |
for rec in result['recommendations']: | |
st.write(f"- {rec.capitalize()}") | |
else: | |
st.error("No text could be extracted. Please try with a different file.") | |