MedLabAnalysis / app.py
bravewiki's picture
Update app.py
914d63a verified
raw
history blame
3.02 kB
import streamlit as st
import torch
from transformers import VisionEncoderDecoderModel, AutoTokenizer, pipeline
from pdf2image import convert_from_path
import pytesseract
from PIL import Image
import os
import io
from typing import List, Tuple
# Initialize models and tokenizer
vision_model_name = "nlpconnect/vit-gpt2-image-captioning"
text_model_name = "peteparker456/medical_diagnosis_llama2"
# Load the vision and text models
vision_model = VisionEncoderDecoderModel.from_pretrained(vision_model_name)
vision_tokenizer = AutoTokenizer.from_pretrained(vision_model_name)
text_model = pipeline("text-generation", model=text_model_name)
pytesseract.pytesseract.tesseract_cmd = r'/usr/bin/tesseract' # Path to Tesseract executable
# Streamlit UI
st.title("Medical Lab Report Analyzer")
st.write(
"Upload an image or PDF file of a medical lab report to get an interpretation, actionable recommendations, and additional insights."
)
# Upload the image or PDF file
uploaded_file = st.file_uploader(
"Upload Image or PDF", type=["jpg", "jpeg", "png", "pdf"]
)
def extract_text_from_image(image: Image.Image) -> str:
return pytesseract.image_to_string(image)
def extract_text_from_pdf(pdf_path: str) -> str:
images = convert_from_path(pdf_path)
text = ""
for img in images:
text += extract_text_from_image(img)
return text
def generate_insights(text: str) -> List[Tuple[str, str]]:
"""Get interpretations and recommendations from the text."""
# Create a dummy input for the text model
inputs = vision_tokenizer.encode(text, return_tensors="pt", max_length=1000, truncation=True)
output_text = text_model(text, max_length=1000)[0]["generated_text"]
return [
("Report Interpretation", output_text),
("Actionable Recommendations", "Consult your physician for further tests if the values are abnormal."),
("Additional Insights", "Regular check-ups can help monitor and maintain healthy levels.")
]
# Process the uploaded file
if uploaded_file:
file_type = uploaded_file.type
file_name = uploaded_file.name
st.write(f"Uploaded File: {file_name}")
if file_type == "application/pdf":
with open("temp.pdf", "wb") as f:
f.write(uploaded_file.getvalue())
extracted_text = extract_text_from_pdf("temp.pdf")
os.remove("temp.pdf")
else: # For image files
image = Image.open(io.BytesIO(uploaded_file.getvalue()))
extracted_text = extract_text_from_image(image)
if extracted_text.strip():
st.subheader("Extracted Text from Report")
st.text_area("Lab Report Text", extracted_text, height=200)
# Get lab report interpretation and recommendations
st.subheader("Analysis & Insights")
insights = generate_insights(extracted_text)
for title, insight in insights:
st.markdown(f"### {title}")
st.write(insight)
else:
st.error("No text found in the uploaded file. Please try another file.")