File size: 3,022 Bytes
c953483
28cb500
914d63a
 
 
 
 
 
 
4fdc44f
914d63a
 
 
c953483
914d63a
 
 
 
0fe3ed9
914d63a
0fe3ed9
914d63a
 
 
 
 
 
 
 
 
 
c953483
914d63a
28cb500
c953483
914d63a
 
c953483
914d63a
 
c953483
 
914d63a
 
 
 
 
28cb500
914d63a
 
 
 
 
c953483
914d63a
c953483
 
914d63a
 
c953483
 
914d63a
 
 
 
 
 
 
c953483
28cb500
914d63a
 
c953483
914d63a
 
 
c953483
914d63a
 
 
c953483
914d63a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import streamlit as st
import torch
from transformers import VisionEncoderDecoderModel, AutoTokenizer, pipeline
from pdf2image import convert_from_path
import pytesseract
from PIL import Image
import os
import io
from typing import List, Tuple

# Initialize models and tokenizer
vision_model_name = "nlpconnect/vit-gpt2-image-captioning"
text_model_name = "peteparker456/medical_diagnosis_llama2"

# Load the vision and text models
vision_model = VisionEncoderDecoderModel.from_pretrained(vision_model_name)
vision_tokenizer = AutoTokenizer.from_pretrained(vision_model_name)
text_model = pipeline("text-generation", model=text_model_name)

pytesseract.pytesseract.tesseract_cmd = r'/usr/bin/tesseract'  # Path to Tesseract executable

# Streamlit UI
st.title("Medical Lab Report Analyzer")
st.write(
    "Upload an image or PDF file of a medical lab report to get an interpretation, actionable recommendations, and additional insights."
)

# Upload the image or PDF file
uploaded_file = st.file_uploader(
    "Upload Image or PDF", type=["jpg", "jpeg", "png", "pdf"]
)

def extract_text_from_image(image: Image.Image) -> str:
    return pytesseract.image_to_string(image)

def extract_text_from_pdf(pdf_path: str) -> str:
    images = convert_from_path(pdf_path)
    text = ""
    for img in images:
        text += extract_text_from_image(img)
    return text

def generate_insights(text: str) -> List[Tuple[str, str]]:
    """Get interpretations and recommendations from the text."""
    # Create a dummy input for the text model
    inputs = vision_tokenizer.encode(text, return_tensors="pt", max_length=1000, truncation=True)
    output_text = text_model(text, max_length=1000)[0]["generated_text"]

    return [
        ("Report Interpretation", output_text),
        ("Actionable Recommendations", "Consult your physician for further tests if the values are abnormal."),
        ("Additional Insights", "Regular check-ups can help monitor and maintain healthy levels.")
    ]

# Process the uploaded file
if uploaded_file:
    file_type = uploaded_file.type
    file_name = uploaded_file.name
    st.write(f"Uploaded File: {file_name}")

    if file_type == "application/pdf":
        with open("temp.pdf", "wb") as f:
            f.write(uploaded_file.getvalue())
        extracted_text = extract_text_from_pdf("temp.pdf")
        os.remove("temp.pdf")
    else:  # For image files
        image = Image.open(io.BytesIO(uploaded_file.getvalue()))
        extracted_text = extract_text_from_image(image)

    if extracted_text.strip():
        st.subheader("Extracted Text from Report")
        st.text_area("Lab Report Text", extracted_text, height=200)

        # Get lab report interpretation and recommendations
        st.subheader("Analysis & Insights")
        insights = generate_insights(extracted_text)

        for title, insight in insights:
            st.markdown(f"### {title}")
            st.write(insight)
    else:
        st.error("No text found in the uploaded file. Please try another file.")