bravewiki commited on
Commit
4fdc44f
·
verified ·
1 Parent(s): f39255d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -34
app.py CHANGED
@@ -1,50 +1,59 @@
1
  import streamlit as st
2
- from transformers import pipeline
3
- from PIL import Image
4
- import pytesseract
5
- import PyPDF2
6
- import pdfplumber
7
  import torch
 
 
 
8
 
9
- # Load the BART model for summarization and NLI
 
 
 
10
  @st.cache_resource
11
- def load_model():
12
- return pipeline("zero-shot-classification", model="facebook/bart-large-mnli", device=0 if torch.cuda.is_available() else -1)
 
 
 
13
 
14
- classifier = load_model()
15
 
16
- # OCR for Image using Tesseract
17
  def extract_text_from_image(image):
18
- return pytesseract.image_to_string(image)
 
 
 
19
 
20
- # Extract text from PDF using pdfplumber
21
  def extract_text_from_pdf(pdf_file):
22
  text = ""
23
- with pdfplumber.open(pdf_file) as pdf:
24
- for page in pdf.pages:
25
- text += page.extract_text()
26
  return text
27
 
28
- # Summarize, interpret and give actionable insights
29
  def analyze_report(text):
30
- # Provide a summary
31
- summary = classifier(text, candidate_labels=["summary"], multi_label=False)['labels'][0]
32
-
33
- # Interpretation of results
34
- interpretation = classifier(text, candidate_labels=["interpretation", "normal", "abnormal"], multi_label=True)
35
 
36
- # Recommendations
37
- recommendations = classifier(text, candidate_labels=["follow-up", "Holistic/OTC treatment", "dietary change", "medication"], multi_label=True)
 
 
38
 
39
  return {
40
  "summary": summary,
41
- "interpretation": interpretation['labels'],
42
- "recommendations": recommendations['labels']
43
  }
44
 
45
  # Streamlit UI
46
- st.title("Medical Lab Report Analyzer")
47
- st.write("Upload your medical lab report (PDF/Image) for insights.")
48
 
49
  uploaded_file = st.file_uploader("Choose a PDF/Image file", type=["pdf", "png", "jpg", "jpeg"])
50
 
@@ -57,12 +66,11 @@ if uploaded_file:
57
  extracted_text = extract_text_from_pdf(uploaded_file)
58
  else:
59
  with st.spinner("Extracting text from Image..."):
60
- image = Image.open(uploaded_file)
61
- extracted_text = extract_text_from_image(image)
62
 
63
  # Analyze the extracted text
64
  if extracted_text:
65
- with st.spinner("Analyzing report..."):
66
  result = analyze_report(extracted_text)
67
 
68
  # Display the results
@@ -70,11 +78,9 @@ if uploaded_file:
70
  st.write(result['summary'])
71
 
72
  st.subheader("Interpretation of Results")
73
- for label in result['interpretation']:
74
- st.write(f"- {label.capitalize()}")
75
 
76
  st.subheader("Actionable Recommendations")
77
- for rec in result['recommendations']:
78
- st.write(f"- {rec.capitalize()}")
79
  else:
80
  st.error("No text could be extracted. Please try with a different file.")
 
1
  import streamlit as st
2
+ from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline
 
 
 
 
3
  import torch
4
+ import easyocr
5
+ from PIL import Image
6
+ import fitz # PyMuPDF
7
 
8
+ # Load EasyOCR Reader
9
+ reader = easyocr.Reader(['en'])
10
+
11
+ # Load ClinicalBERT Model
12
  @st.cache_resource
13
+ def load_clinicalbert():
14
+ model_name = "emilyalsentzer/Bio_ClinicalBERT"
15
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
16
+ model = AutoModelForSequenceClassification.from_pretrained(model_name)
17
+ return pipeline("text-classification", model=model, tokenizer=tokenizer, device=0 if torch.cuda.is_available() else -1)
18
 
19
+ clinicalbert_analyzer = load_clinicalbert()
20
 
21
+ # OCR function using EasyOCR
22
  def extract_text_from_image(image):
23
+ image = Image.open(image).convert('RGB')
24
+ result = reader.readtext(image, detail=0)
25
+ extracted_text = " ".join(result)
26
+ return extracted_text
27
 
28
+ # PDF text extraction using PyMuPDF
29
  def extract_text_from_pdf(pdf_file):
30
  text = ""
31
+ with fitz.open(pdf_file) as pdf:
32
+ for page in pdf:
33
+ text += page.get_text()
34
  return text
35
 
36
+ # Analysis function using ClinicalBERT
37
  def analyze_report(text):
38
+ # Define prompts for analysis
39
+ summary_prompt = f"Summarize the following medical report:\n{text}"
40
+ interpretation_prompt = f"Interpret the following lab results:\n{text}"
41
+ recommendation_prompt = f"Provide actionable recommendations based on this medical report:\n{text}"
 
42
 
43
+ # Use ClinicalBERT for text analysis
44
+ summary = clinicalbert_analyzer(summary_prompt)[0]['label']
45
+ interpretation = clinicalbert_analyzer(interpretation_prompt)[0]['label']
46
+ recommendations = clinicalbert_analyzer(recommendation_prompt)[0]['label']
47
 
48
  return {
49
  "summary": summary,
50
+ "interpretation": interpretation,
51
+ "recommendations": recommendations
52
  }
53
 
54
  # Streamlit UI
55
+ st.title("Clinical Lab Report Analyzer")
56
+ st.write("Upload your medical lab report (PDF/Image) to get a summary, interpretation, and actionable recommendations.")
57
 
58
  uploaded_file = st.file_uploader("Choose a PDF/Image file", type=["pdf", "png", "jpg", "jpeg"])
59
 
 
66
  extracted_text = extract_text_from_pdf(uploaded_file)
67
  else:
68
  with st.spinner("Extracting text from Image..."):
69
+ extracted_text = extract_text_from_image(uploaded_file)
 
70
 
71
  # Analyze the extracted text
72
  if extracted_text:
73
+ with st.spinner("Analyzing the medical report using ClinicalBERT..."):
74
  result = analyze_report(extracted_text)
75
 
76
  # Display the results
 
78
  st.write(result['summary'])
79
 
80
  st.subheader("Interpretation of Results")
81
+ st.write(result['interpretation'])
 
82
 
83
  st.subheader("Actionable Recommendations")
84
+ st.write(result['recommendations'])
 
85
  else:
86
  st.error("No text could be extracted. Please try with a different file.")