Spaces:
Sleeping
Sleeping
import streamlit as st | |
import torch | |
from transformers import VisionEncoderDecoderModel, AutoTokenizer, pipeline | |
from pdf2image import convert_from_path | |
import pytesseract | |
from PIL import Image | |
import os | |
import io | |
from typing import List, Tuple | |
# Initialize models and tokenizer | |
vision_model_name = "nlpconnect/vit-gpt2-image-captioning" | |
text_model_name = "peteparker456/medical_diagnosis_llama2" | |
# Load the vision and text models | |
vision_model = VisionEncoderDecoderModel.from_pretrained(vision_model_name) | |
vision_tokenizer = AutoTokenizer.from_pretrained(vision_model_name) | |
text_model = pipeline("text-generation", model=text_model_name) | |
pytesseract.pytesseract.tesseract_cmd = r'/usr/bin/tesseract' # Path to Tesseract executable | |
# Streamlit UI | |
st.title("Medical Lab Report Analyzer") | |
st.write( | |
"Upload an image or PDF file of a medical lab report to get an interpretation, actionable recommendations, and additional insights." | |
) | |
# Upload the image or PDF file | |
uploaded_file = st.file_uploader( | |
"Upload Image or PDF", type=["jpg", "jpeg", "png", "pdf"] | |
) | |
def extract_text_from_image(image: Image.Image) -> str: | |
return pytesseract.image_to_string(image) | |
def extract_text_from_pdf(pdf_path: str) -> str: | |
images = convert_from_path(pdf_path) | |
text = "" | |
for img in images: | |
text += extract_text_from_image(img) | |
return text | |
def generate_insights(text: str) -> List[Tuple[str, str]]: | |
"""Get interpretations and recommendations from the text.""" | |
# Create a dummy input for the text model | |
inputs = vision_tokenizer.encode(text, return_tensors="pt", max_length=1000, truncation=True) | |
output_text = text_model(text, max_length=1000)[0]["generated_text"] | |
return [ | |
("Report Interpretation", output_text), | |
("Actionable Recommendations", "Consult your physician for further tests if the values are abnormal."), | |
("Additional Insights", "Regular check-ups can help monitor and maintain healthy levels.") | |
] | |
# Process the uploaded file | |
if uploaded_file: | |
file_type = uploaded_file.type | |
file_name = uploaded_file.name | |
st.write(f"Uploaded File: {file_name}") | |
if file_type == "application/pdf": | |
with open("temp.pdf", "wb") as f: | |
f.write(uploaded_file.getvalue()) | |
extracted_text = extract_text_from_pdf("temp.pdf") | |
os.remove("temp.pdf") | |
else: # For image files | |
image = Image.open(io.BytesIO(uploaded_file.getvalue())) | |
extracted_text = extract_text_from_image(image) | |
if extracted_text.strip(): | |
st.subheader("Extracted Text from Report") | |
st.text_area("Lab Report Text", extracted_text, height=200) | |
# Get lab report interpretation and recommendations | |
st.subheader("Analysis & Insights") | |
insights = generate_insights(extracted_text) | |
for title, insight in insights: | |
st.markdown(f"### {title}") | |
st.write(insight) | |
else: | |
st.error("No text found in the uploaded file. Please try another file.") | |