File size: 3,282 Bytes
c953483
28cb500
4fdc44f
28cb500
 
 
 
4fdc44f
28cb500
c953483
4fdc44f
28cb500
 
 
c953483
28cb500
 
c953483
28cb500
c953483
28cb500
c953483
28cb500
c953483
 
28cb500
 
 
c953483
 
28cb500
 
 
 
c953483
28cb500
 
 
 
 
 
 
 
 
 
 
 
 
c953483
 
 
28cb500
 
c953483
 
 
28cb500
 
c953483
 
 
 
 
 
 
 
 
 
 
 
28cb500
 
c953483
 
28cb500
 
 
 
c953483
28cb500
c953483
 
 
28cb500
 
c953483
 
28cb500
 
c953483
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
import streamlit as st
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
from PIL import Image
import pytesseract
import PyPDF2
import pdfplumber
import torch

# Load ClinicalBERT model for medical text analysis
@st.cache_resource
def load_clinicalbert():
    tokenizer = AutoTokenizer.from_pretrained("emilyalsentzer/Bio_ClinicalBERT")
    model = AutoModelForSequenceClassification.from_pretrained("emilyalsentzer/Bio_ClinicalBERT")
    return pipeline("zero-shot-classification", model=model, tokenizer=tokenizer, device=0 if torch.cuda.is_available() else -1)

# Initialize ClinicalBERT classifier
clinical_bert = load_clinicalbert()

# OCR for Image using Tesseract
def extract_text_from_image(image):
    return pytesseract.image_to_string(image)

# Extract text from PDF using pdfplumber
def extract_text_from_pdf(pdf_file):
    text = ""
    with pdfplumber.open(pdf_file) as pdf:
        for page in pdf.pages:
            text += page.extract_text() or ""
    return text

# Analyze and interpret the medical report using ClinicalBERT
def analyze_medical_text(text):
    # Summarize the extracted text
    summary = clinical_bert(text, candidate_labels=["summary", "overview", "findings"], multi_label=False)['labels'][0]

    # Provide detailed interpretation
    interpretation = clinical_bert(
        text,
        candidate_labels=["normal", "abnormal", "urgent", "needs follow-up", "critical condition"],
        multi_label=True
    )

    # Provide actionable recommendations
    recommendations = clinical_bert(
        text,
        candidate_labels=["medication", "dietary change", "exercise", "follow-up with a doctor", "lifestyle change"],
        multi_label=True
    )

    return {
        "summary": summary,
        "interpretation": interpretation['labels'],
        "recommendations": recommendations['labels']
    }

# Streamlit UI
st.title("Medical Lab Report Analyzer with ClinicalBERT")
st.write("Upload your medical lab report (PDF/Image) to get a summary and actionable insights using ClinicalBERT.")

uploaded_file = st.file_uploader("Choose a PDF/Image file", type=["pdf", "png", "jpg", "jpeg"])

if uploaded_file:
    file_type = uploaded_file.type

    # Extract text based on file type
    if file_type == "application/pdf":
        with st.spinner("Extracting text from PDF..."):
            extracted_text = extract_text_from_pdf(uploaded_file)
    else:
        with st.spinner("Extracting text from Image..."):
            image = Image.open(uploaded_file)
            extracted_text = extract_text_from_image(image)

    # Analyze the extracted text
    if extracted_text.strip():
        with st.spinner("Analyzing report using ClinicalBERT..."):
            result = analyze_medical_text(extracted_text)

        # Display the results
        st.subheader("Summary of the Report")
        st.write(result['summary'])

        st.subheader("Interpretation of Results")
        for label in result['interpretation']:
            st.write(f"- {label.capitalize()}")

        st.subheader("Actionable Recommendations")
        for rec in result['recommendations']:
            st.write(f"- {rec.capitalize()}")
    else:
        st.error("No text could be extracted. Please try with a different file.")