bravewiki commited on
Commit
914d63a
·
verified ·
1 Parent(s): 0fe3ed9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +59 -72
app.py CHANGED
@@ -1,95 +1,82 @@
1
  import streamlit as st
2
- from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
3
- from transformers import AutoModelForSequenceClassification
4
- from PIL import Image
5
- import pytesseract
6
- import pdfplumber
7
  import torch
 
 
 
 
 
 
 
8
 
9
- # Load BART for zero-shot classification and Bio_ClinicalBERT for text summarization
10
- @st.cache_resource
11
- def load_models():
12
- # Bio_ClinicalBERT for text summarization
13
- tokenizer_bert = AutoTokenizer.from_pretrained("emilyalsentzer/Bio_ClinicalBERT")
14
- model_bert = AutoModelForSeq2SeqLM.from_pretrained("facebook/bart-large-cnn")
15
- summarizer = pipeline("summarization", model=model_bert, tokenizer=tokenizer_bert, device=0 if torch.cuda.is_available() else -1)
16
 
17
- # BART model for zero-shot classification
18
- classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli", device=0 if torch.cuda.is_available() else -1)
 
 
19
 
20
- return summarizer, classifier
21
 
22
- summarizer, classifier = load_models()
 
 
 
 
 
 
 
 
 
23
 
24
- # OCR for Image using Tesseract
25
- def extract_text_from_image(image):
26
  return pytesseract.image_to_string(image)
27
 
28
- # Extract text from PDF using pdfplumber
29
- def extract_text_from_pdf(pdf_file):
30
  text = ""
31
- with pdfplumber.open(pdf_file) as pdf:
32
- for page in pdf.pages:
33
- text += page.extract_text() or ""
34
  return text
35
 
36
- # Analyze and interpret the medical report
37
- def analyze_medical_text(text):
38
- # Summarize the extracted text using ClinicalBERT
39
- summarized_text = summarizer(text, max_length=100, min_length=30, do_sample=False)[0]['summary_text']
40
-
41
- # Use BART for classification insights
42
- interpretation = classifier(
43
- summarized_text,
44
- candidate_labels=["normal", "abnormal", "urgent", "needs follow-up", "critical condition"],
45
- multi_label=True
46
- )
47
 
48
- recommendations = classifier(
49
- summarized_text,
50
- candidate_labels=["medication", "dietary change", "exercise", "follow-up with a doctor", "lifestyle change"],
51
- multi_label=True
52
- )
53
-
54
- return {
55
- "summary": summarized_text,
56
- "interpretation": interpretation['labels'],
57
- "recommendations": recommendations['labels']
58
- }
59
-
60
- # Streamlit UI
61
- st.title("Medical Lab Report Analyzer with ClinicalBERT and BART")
62
- st.write("Upload your medical lab report (PDF/Image) to get a summary and actionable insights.")
63
-
64
- uploaded_file = st.file_uploader("Choose a PDF/Image file", type=["pdf", "png", "jpg", "jpeg"])
65
 
 
66
  if uploaded_file:
67
  file_type = uploaded_file.type
 
 
68
 
69
- # Extract text based on file type
70
  if file_type == "application/pdf":
71
- with st.spinner("Extracting text from PDF..."):
72
- extracted_text = extract_text_from_pdf(uploaded_file)
73
- else:
74
- with st.spinner("Extracting text from Image..."):
75
- image = Image.open(uploaded_file)
76
- extracted_text = extract_text_from_image(image)
 
77
 
78
- # Analyze the extracted text
79
  if extracted_text.strip():
80
- with st.spinner("Analyzing report using ClinicalBERT..."):
81
- result = analyze_medical_text(extracted_text)
82
-
83
- # Display the results
84
- st.subheader("Summary of the Report")
85
- st.write(result['summary'])
86
 
87
- st.subheader("Interpretation of Results")
88
- for label in result['interpretation']:
89
- st.write(f"- {label.capitalize()}")
90
 
91
- st.subheader("Actionable Recommendations")
92
- for rec in result['recommendations']:
93
- st.write(f"- {rec.capitalize()}")
94
  else:
95
- st.error("No text could be extracted. Please try with a different file.")
 
1
  import streamlit as st
 
 
 
 
 
2
  import torch
3
+ from transformers import VisionEncoderDecoderModel, AutoTokenizer, pipeline
4
+ from pdf2image import convert_from_path
5
+ import pytesseract
6
+ from PIL import Image
7
+ import os
8
+ import io
9
+ from typing import List, Tuple
10
 
11
+ # Initialize models and tokenizer
12
+ vision_model_name = "nlpconnect/vit-gpt2-image-captioning"
13
+ text_model_name = "peteparker456/medical_diagnosis_llama2"
 
 
 
 
14
 
15
+ # Load the vision and text models
16
+ vision_model = VisionEncoderDecoderModel.from_pretrained(vision_model_name)
17
+ vision_tokenizer = AutoTokenizer.from_pretrained(vision_model_name)
18
+ text_model = pipeline("text-generation", model=text_model_name)
19
 
20
+ pytesseract.pytesseract.tesseract_cmd = r'/usr/bin/tesseract' # Path to Tesseract executable
21
 
22
+ # Streamlit UI
23
+ st.title("Medical Lab Report Analyzer")
24
+ st.write(
25
+ "Upload an image or PDF file of a medical lab report to get an interpretation, actionable recommendations, and additional insights."
26
+ )
27
+
28
+ # Upload the image or PDF file
29
+ uploaded_file = st.file_uploader(
30
+ "Upload Image or PDF", type=["jpg", "jpeg", "png", "pdf"]
31
+ )
32
 
33
+ def extract_text_from_image(image: Image.Image) -> str:
 
34
  return pytesseract.image_to_string(image)
35
 
36
+ def extract_text_from_pdf(pdf_path: str) -> str:
37
+ images = convert_from_path(pdf_path)
38
  text = ""
39
+ for img in images:
40
+ text += extract_text_from_image(img)
 
41
  return text
42
 
43
+ def generate_insights(text: str) -> List[Tuple[str, str]]:
44
+ """Get interpretations and recommendations from the text."""
45
+ # Create a dummy input for the text model
46
+ inputs = vision_tokenizer.encode(text, return_tensors="pt", max_length=1000, truncation=True)
47
+ output_text = text_model(text, max_length=1000)[0]["generated_text"]
 
 
 
 
 
 
48
 
49
+ return [
50
+ ("Report Interpretation", output_text),
51
+ ("Actionable Recommendations", "Consult your physician for further tests if the values are abnormal."),
52
+ ("Additional Insights", "Regular check-ups can help monitor and maintain healthy levels.")
53
+ ]
 
 
 
 
 
 
 
 
 
 
 
 
54
 
55
+ # Process the uploaded file
56
  if uploaded_file:
57
  file_type = uploaded_file.type
58
+ file_name = uploaded_file.name
59
+ st.write(f"Uploaded File: {file_name}")
60
 
 
61
  if file_type == "application/pdf":
62
+ with open("temp.pdf", "wb") as f:
63
+ f.write(uploaded_file.getvalue())
64
+ extracted_text = extract_text_from_pdf("temp.pdf")
65
+ os.remove("temp.pdf")
66
+ else: # For image files
67
+ image = Image.open(io.BytesIO(uploaded_file.getvalue()))
68
+ extracted_text = extract_text_from_image(image)
69
 
 
70
  if extracted_text.strip():
71
+ st.subheader("Extracted Text from Report")
72
+ st.text_area("Lab Report Text", extracted_text, height=200)
 
 
 
 
73
 
74
+ # Get lab report interpretation and recommendations
75
+ st.subheader("Analysis & Insights")
76
+ insights = generate_insights(extracted_text)
77
 
78
+ for title, insight in insights:
79
+ st.markdown(f"### {title}")
80
+ st.write(insight)
81
  else:
82
+ st.error("No text found in the uploaded file. Please try another file.")