Spaces:

bravewiki
/

MedLabAnalysis

Sleeping

App Files Files Community

bravewiki commited on Nov 8, 2024

Commit

914d63a

verified ·

1 Parent(s): 0fe3ed9

Update app.py

Browse files

Files changed (1) hide show

app.py +59 -72

app.py CHANGED Viewed

@@ -1,95 +1,82 @@
 import streamlit as st
-from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
-from transformers import AutoModelForSequenceClassification
-from PIL import Image
-import pytesseract
-import pdfplumber
 import torch
-# Load BART for zero-shot classification and Bio_ClinicalBERT for text summarization
-@st.cache_resource
-def load_models():
-    # Bio_ClinicalBERT for text summarization
-    tokenizer_bert = AutoTokenizer.from_pretrained("emilyalsentzer/Bio_ClinicalBERT")
-    model_bert = AutoModelForSeq2SeqLM.from_pretrained("facebook/bart-large-cnn")
-    summarizer = pipeline("summarization", model=model_bert, tokenizer=tokenizer_bert, device=0 if torch.cuda.is_available() else -1)
-    # BART model for zero-shot classification
-    classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli", device=0 if torch.cuda.is_available() else -1)
-    return summarizer, classifier
-summarizer, classifier = load_models()
-# OCR for Image using Tesseract
-def extract_text_from_image(image):
     return pytesseract.image_to_string(image)
-# Extract text from PDF using pdfplumber
-def extract_text_from_pdf(pdf_file):
     text = ""
-    with pdfplumber.open(pdf_file) as pdf:
-        for page in pdf.pages:
-            text += page.extract_text() or ""
     return text
-# Analyze and interpret the medical report
-def analyze_medical_text(text):
-    # Summarize the extracted text using ClinicalBERT
-    summarized_text = summarizer(text, max_length=100, min_length=30, do_sample=False)[0]['summary_text']
-    # Use BART for classification insights
-    interpretation = classifier(
-        summarized_text,
-        candidate_labels=["normal", "abnormal", "urgent", "needs follow-up", "critical condition"],
-        multi_label=True
-    )
-    recommendations = classifier(
-        summarized_text,
-        candidate_labels=["medication", "dietary change", "exercise", "follow-up with a doctor", "lifestyle change"],
-        multi_label=True
-    )
-    return {
-        "summary": summarized_text,
-        "interpretation": interpretation['labels'],
-        "recommendations": recommendations['labels']
-    }
-# Streamlit UI
-st.title("Medical Lab Report Analyzer with ClinicalBERT and BART")
-st.write("Upload your medical lab report (PDF/Image) to get a summary and actionable insights.")
-uploaded_file = st.file_uploader("Choose a PDF/Image file", type=["pdf", "png", "jpg", "jpeg"])
 if uploaded_file:
     file_type = uploaded_file.type
-    # Extract text based on file type
     if file_type == "application/pdf":
-        with st.spinner("Extracting text from PDF..."):
-            extracted_text = extract_text_from_pdf(uploaded_file)
-    else:
-        with st.spinner("Extracting text from Image..."):
-            image = Image.open(uploaded_file)
-            extracted_text = extract_text_from_image(image)
-    # Analyze the extracted text
     if extracted_text.strip():
-        with st.spinner("Analyzing report using ClinicalBERT..."):
-            result = analyze_medical_text(extracted_text)
-        # Display the results
-        st.subheader("Summary of the Report")
-        st.write(result['summary'])
-        st.subheader("Interpretation of Results")
-        for label in result['interpretation']:
-            st.write(f"- {label.capitalize()}")
-        st.subheader("Actionable Recommendations")
-        for rec in result['recommendations']:
-            st.write(f"- {rec.capitalize()}")
     else:
-        st.error("No text could be extracted. Please try with a different file.")

 import streamlit as st
 import torch
+from transformers import VisionEncoderDecoderModel, AutoTokenizer, pipeline
+from pdf2image import convert_from_path
+import pytesseract
+from PIL import Image
+import os
+import io
+from typing import List, Tuple
+# Initialize models and tokenizer
+vision_model_name = "nlpconnect/vit-gpt2-image-captioning"
+text_model_name = "peteparker456/medical_diagnosis_llama2"
+# Load the vision and text models
+vision_model = VisionEncoderDecoderModel.from_pretrained(vision_model_name)
+vision_tokenizer = AutoTokenizer.from_pretrained(vision_model_name)
+text_model = pipeline("text-generation", model=text_model_name)
+pytesseract.pytesseract.tesseract_cmd = r'/usr/bin/tesseract'  # Path to Tesseract executable
+# Streamlit UI
+st.title("Medical Lab Report Analyzer")
+st.write(
+    "Upload an image or PDF file of a medical lab report to get an interpretation, actionable recommendations, and additional insights."
+)
+# Upload the image or PDF file
+uploaded_file = st.file_uploader(
+    "Upload Image or PDF", type=["jpg", "jpeg", "png", "pdf"]
+)
+def extract_text_from_image(image: Image.Image) -> str:
     return pytesseract.image_to_string(image)
+def extract_text_from_pdf(pdf_path: str) -> str:
+    images = convert_from_path(pdf_path)
     text = ""
+    for img in images:
+        text += extract_text_from_image(img)
     return text
+def generate_insights(text: str) -> List[Tuple[str, str]]:
+    """Get interpretations and recommendations from the text."""
+    # Create a dummy input for the text model
+    inputs = vision_tokenizer.encode(text, return_tensors="pt", max_length=1000, truncation=True)
+    output_text = text_model(text, max_length=1000)[0]["generated_text"]
+    return [
+        ("Report Interpretation", output_text),
+        ("Actionable Recommendations", "Consult your physician for further tests if the values are abnormal."),
+        ("Additional Insights", "Regular check-ups can help monitor and maintain healthy levels.")
+    ]
+# Process the uploaded file
 if uploaded_file:
     file_type = uploaded_file.type
+    file_name = uploaded_file.name
+    st.write(f"Uploaded File: {file_name}")
     if file_type == "application/pdf":
+        with open("temp.pdf", "wb") as f:
+            f.write(uploaded_file.getvalue())
+        extracted_text = extract_text_from_pdf("temp.pdf")
+        os.remove("temp.pdf")
+    else:  # For image files
+        image = Image.open(io.BytesIO(uploaded_file.getvalue()))
+        extracted_text = extract_text_from_image(image)
     if extracted_text.strip():
+        st.subheader("Extracted Text from Report")
+        st.text_area("Lab Report Text", extracted_text, height=200)
+        # Get lab report interpretation and recommendations
+        st.subheader("Analysis & Insights")
+        insights = generate_insights(extracted_text)
+        for title, insight in insights:
+            st.markdown(f"### {title}")
+            st.write(insight)
     else:
+        st.error("No text found in the uploaded file. Please try another file.")