bravewiki commited on
Commit
28cb500
·
verified ·
1 Parent(s): d9a0d2c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -44
app.py CHANGED
@@ -1,59 +1,61 @@
1
  import streamlit as st
2
- from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline
3
- import torch
4
- import easyocr
5
  from PIL import Image
6
- import fitz # PyMuPDF
7
-
8
- # Load EasyOCR Reader
9
- reader = easyocr.Reader(['en'])
10
 
11
- # Load ClinicalBERT Model
12
  @st.cache_resource
13
  def load_clinicalbert():
14
- model_name = "emilyalsentzer/Bio_ClinicalBERT"
15
- tokenizer = AutoTokenizer.from_pretrained(model_name)
16
- model = AutoModelForSequenceClassification.from_pretrained(model_name)
17
- return pipeline("text-classification", model=model, tokenizer=tokenizer, device=0 if torch.cuda.is_available() else -1)
18
 
19
- clinicalbert_analyzer = load_clinicalbert()
 
20
 
21
- # OCR function using EasyOCR
22
  def extract_text_from_image(image):
23
- image = Image.open(image).convert('RGB')
24
- result = reader.readtext(image, detail=0)
25
- extracted_text = " ".join(result)
26
- return extracted_text
27
 
28
- # PDF text extraction using PyMuPDF
29
  def extract_text_from_pdf(pdf_file):
30
  text = ""
31
- with fitz.open(pdf_file) as pdf:
32
- for page in pdf:
33
- text += page.get_text()
34
  return text
35
 
36
- # Analysis function using ClinicalBERT
37
- def analyze_report(text):
38
- # Define prompts for analysis
39
- summary_prompt = f"Summarize the following medical report:\n{text}"
40
- interpretation_prompt = f"Interpret the following lab results:\n{text}"
41
- recommendation_prompt = f"Provide actionable recommendations based on this medical report:\n{text}"
42
 
43
- # Use ClinicalBERT for text analysis
44
- summary = clinicalbert_analyzer(summary_prompt)[0]['label']
45
- interpretation = clinicalbert_analyzer(interpretation_prompt)[0]['label']
46
- recommendations = clinicalbert_analyzer(recommendation_prompt)[0]['label']
 
 
 
 
 
 
 
 
 
47
 
48
  return {
49
  "summary": summary,
50
- "interpretation": interpretation,
51
- "recommendations": recommendations
52
  }
53
 
54
  # Streamlit UI
55
- st.title("Clinical Lab Report Analyzer")
56
- st.write("Upload your medical lab report (PDF/Image) to get a summary, interpretation, and actionable recommendations.")
57
 
58
  uploaded_file = st.file_uploader("Choose a PDF/Image file", type=["pdf", "png", "jpg", "jpeg"])
59
 
@@ -66,21 +68,24 @@ if uploaded_file:
66
  extracted_text = extract_text_from_pdf(uploaded_file)
67
  else:
68
  with st.spinner("Extracting text from Image..."):
69
- extracted_text = extract_text_from_image(uploaded_file)
 
70
 
71
  # Analyze the extracted text
72
- if extracted_text:
73
- with st.spinner("Analyzing the medical report using ClinicalBERT..."):
74
- result = analyze_report(extracted_text)
75
-
76
  # Display the results
77
- st.subheader("Summary")
78
  st.write(result['summary'])
79
 
80
  st.subheader("Interpretation of Results")
81
- st.write(result['interpretation'])
 
82
 
83
  st.subheader("Actionable Recommendations")
84
- st.write(result['recommendations'])
 
85
  else:
86
  st.error("No text could be extracted. Please try with a different file.")
 
1
  import streamlit as st
2
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
 
 
3
  from PIL import Image
4
+ import pytesseract
5
+ import PyPDF2
6
+ import pdfplumber
7
+ import torch
8
 
9
+ # Load ClinicalBERT model for medical text analysis
10
  @st.cache_resource
11
  def load_clinicalbert():
12
+ tokenizer = AutoTokenizer.from_pretrained("emilyalsentzer/Bio_ClinicalBERT")
13
+ model = AutoModelForSequenceClassification.from_pretrained("emilyalsentzer/Bio_ClinicalBERT")
14
+ return pipeline("zero-shot-classification", model=model, tokenizer=tokenizer, device=0 if torch.cuda.is_available() else -1)
 
15
 
16
+ # Initialize ClinicalBERT classifier
17
+ clinical_bert = load_clinicalbert()
18
 
19
+ # OCR for Image using Tesseract
20
  def extract_text_from_image(image):
21
+ return pytesseract.image_to_string(image)
 
 
 
22
 
23
+ # Extract text from PDF using pdfplumber
24
  def extract_text_from_pdf(pdf_file):
25
  text = ""
26
+ with pdfplumber.open(pdf_file) as pdf:
27
+ for page in pdf.pages:
28
+ text += page.extract_text() or ""
29
  return text
30
 
31
+ # Analyze and interpret the medical report using ClinicalBERT
32
+ def analyze_medical_text(text):
33
+ # Summarize the extracted text
34
+ summary = clinical_bert(text, candidate_labels=["summary", "overview", "findings"], multi_label=False)['labels'][0]
 
 
35
 
36
+ # Provide detailed interpretation
37
+ interpretation = clinical_bert(
38
+ text,
39
+ candidate_labels=["normal", "abnormal", "urgent", "needs follow-up", "critical condition"],
40
+ multi_label=True
41
+ )
42
+
43
+ # Provide actionable recommendations
44
+ recommendations = clinical_bert(
45
+ text,
46
+ candidate_labels=["medication", "dietary change", "exercise", "follow-up with a doctor", "lifestyle change"],
47
+ multi_label=True
48
+ )
49
 
50
  return {
51
  "summary": summary,
52
+ "interpretation": interpretation['labels'],
53
+ "recommendations": recommendations['labels']
54
  }
55
 
56
  # Streamlit UI
57
+ st.title("Medical Lab Report Analyzer with ClinicalBERT")
58
+ st.write("Upload your medical lab report (PDF/Image) to get a summary and actionable insights using ClinicalBERT.")
59
 
60
  uploaded_file = st.file_uploader("Choose a PDF/Image file", type=["pdf", "png", "jpg", "jpeg"])
61
 
 
68
  extracted_text = extract_text_from_pdf(uploaded_file)
69
  else:
70
  with st.spinner("Extracting text from Image..."):
71
+ image = Image.open(uploaded_file)
72
+ extracted_text = extract_text_from_image(image)
73
 
74
  # Analyze the extracted text
75
+ if extracted_text.strip():
76
+ with st.spinner("Analyzing report using ClinicalBERT..."):
77
+ result = analyze_medical_text(extracted_text)
78
+
79
  # Display the results
80
+ st.subheader("Summary of the Report")
81
  st.write(result['summary'])
82
 
83
  st.subheader("Interpretation of Results")
84
+ for label in result['interpretation']:
85
+ st.write(f"- {label.capitalize()}")
86
 
87
  st.subheader("Actionable Recommendations")
88
+ for rec in result['recommendations']:
89
+ st.write(f"- {rec.capitalize()}")
90
  else:
91
  st.error("No text could be extracted. Please try with a different file.")