Spaces:

bravewiki
/

MedLabAnalysis

Sleeping

App Files Files Community

MedLabAnalysis / app.py

bravewiki

Update app.py

0fe3ed9 verified 10 months ago

raw

history blame

3.53 kB

	import streamlit as st
	from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
	from transformers import AutoModelForSequenceClassification
	from PIL import Image
	import pytesseract
	import pdfplumber
	import torch

	# Load BART for zero-shot classification and Bio_ClinicalBERT for text summarization
	@st.cache_resource
	def load_models():
	# Bio_ClinicalBERT for text summarization
	tokenizer_bert = AutoTokenizer.from_pretrained("emilyalsentzer/Bio_ClinicalBERT")
	model_bert = AutoModelForSeq2SeqLM.from_pretrained("facebook/bart-large-cnn")
	summarizer = pipeline("summarization", model=model_bert, tokenizer=tokenizer_bert, device=0 if torch.cuda.is_available() else -1)

	# BART model for zero-shot classification
	classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli", device=0 if torch.cuda.is_available() else -1)

	return summarizer, classifier

	summarizer, classifier = load_models()

	# OCR for Image using Tesseract
	def extract_text_from_image(image):
	return pytesseract.image_to_string(image)

	# Extract text from PDF using pdfplumber
	def extract_text_from_pdf(pdf_file):
	text = ""
	with pdfplumber.open(pdf_file) as pdf:
	for page in pdf.pages:
	text += page.extract_text() or ""
	return text

	# Analyze and interpret the medical report
	def analyze_medical_text(text):
	# Summarize the extracted text using ClinicalBERT
	summarized_text = summarizer(text, max_length=100, min_length=30, do_sample=False)[0]['summary_text']

	# Use BART for classification insights
	interpretation = classifier(
	summarized_text,
	candidate_labels=["normal", "abnormal", "urgent", "needs follow-up", "critical condition"],
	multi_label=True
	)

	recommendations = classifier(
	summarized_text,
	candidate_labels=["medication", "dietary change", "exercise", "follow-up with a doctor", "lifestyle change"],
	multi_label=True
	)

	return {
	"summary": summarized_text,
	"interpretation": interpretation['labels'],
	"recommendations": recommendations['labels']
	}

	# Streamlit UI
	st.title("Medical Lab Report Analyzer with ClinicalBERT and BART")
	st.write("Upload your medical lab report (PDF/Image) to get a summary and actionable insights.")

	uploaded_file = st.file_uploader("Choose a PDF/Image file", type=["pdf", "png", "jpg", "jpeg"])

	if uploaded_file:
	file_type = uploaded_file.type

	# Extract text based on file type
	if file_type == "application/pdf":
	with st.spinner("Extracting text from PDF..."):
	extracted_text = extract_text_from_pdf(uploaded_file)
	else:
	with st.spinner("Extracting text from Image..."):
	image = Image.open(uploaded_file)
	extracted_text = extract_text_from_image(image)

	# Analyze the extracted text
	if extracted_text.strip():
	with st.spinner("Analyzing report using ClinicalBERT..."):
	result = analyze_medical_text(extracted_text)

	# Display the results
	st.subheader("Summary of the Report")
	st.write(result['summary'])

	st.subheader("Interpretation of Results")
	for label in result['interpretation']:
	st.write(f"- {label.capitalize()}")

	st.subheader("Actionable Recommendations")
	for rec in result['recommendations']:
	st.write(f"- {rec.capitalize()}")
	else:
	st.error("No text could be extracted. Please try with a different file.")