faisalshah012003 commited on
Commit
10dfafd
·
verified ·
1 Parent(s): 793f3a3

Update medical_simplifier.py

Browse files
Files changed (1) hide show
  1. medical_simplifier.py +30 -53
medical_simplifier.py CHANGED
@@ -1,99 +1,76 @@
1
- import scispacy
2
  import spacy
3
- import torch
4
  import re
5
  import sys
6
- from transformers import AutoTokenizer, AutoModelForCausalLM
7
- from sklearn.metrics.pairwise import cosine_similarity
8
- import os
9
-
10
- # Set cache directory at module level
11
- os.environ["TRANSFORMERS_CACHE"] = "/tmp/huggingface_cache"
12
- os.environ["HF_HOME"] = "/tmp/huggingface_cache"
13
 
14
  class MedicalTextSimplifier:
15
  def __init__(self):
16
  print("Loading models...")
17
  try:
18
- # Load SciSpaCy for term identification
19
  self.nlp = spacy.load("en_core_sci_sm")
20
-
21
- # Load BioMedLM (Stanford)
22
- self.tokenizer = AutoTokenizer.from_pretrained(
23
- "stanford-crfm/BioMedLM",
24
- cache_dir="/tmp/huggingface_cache"
25
- )
26
- self.model = AutoModelForCausalLM.from_pretrained(
27
- "stanford-crfm/BioMedLM",
28
- cache_dir="/tmp/huggingface_cache"
29
- )
30
  print("Models loaded successfully!")
31
  except Exception as e:
32
  print(f"Error loading models: {e}")
33
- print("Make sure all required packages are installed.")
34
  sys.exit(1)
35
-
36
  def identify_medical_terms(self, text):
37
- """Identify medical entities using SciSpaCy"""
38
  doc = self.nlp(text)
39
  terms = []
40
  for ent in doc.ents:
41
- terms.append({'term': ent.text, 'start': ent.start_char, 'end': ent.end_char})
 
 
 
 
42
  return terms
43
-
44
  def generate_simplified_explanation(self, term, context):
45
- """Generate a plain-language explanation using BioMedLM"""
46
  try:
47
  prompt = f"Explain the medical term '{term}' in simple language for a patient. Context: {context}\nExplanation:"
48
- inputs = self.tokenizer.encode(prompt, return_tensors="pt", max_length=512, truncation=True)
49
-
50
- outputs = self.model.generate(
51
- inputs,
52
- max_length=100,
53
- num_return_sequences=1,
54
- temperature=0.7,
55
- top_p=0.9,
56
- do_sample=True,
57
- eos_token_id=self.tokenizer.eos_token_id
58
- )
59
-
60
- explanation = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
61
- explanation = explanation.split("Explanation:")[-1].strip()
62
  return explanation
63
  except Exception as e:
64
- print(f"Error generating explanation: {e}")
65
  return f"a medical term related to {term}"
66
-
67
  def simplify_text(self, text):
68
  print("\nOriginal text:")
69
  print(text)
70
-
71
  print("\nIdentifying medical terms using SciSpaCy...")
72
  medical_terms = self.identify_medical_terms(text)
73
-
74
  if not medical_terms:
75
  print("No medical terms found.")
76
  return text
77
-
78
  simplified_text = text
79
- offset = 0 # to keep track of text length changes during replacement
80
-
81
- print("\nMedical terms and explanations:")
82
  for item in medical_terms:
83
  term = item['term']
84
  start = item['start'] + offset
85
  end = item['end'] + offset
86
-
87
  explanation = self.generate_simplified_explanation(term, text)
88
  annotated = f"{term} ({explanation})"
89
-
90
- # Replace the term in text
91
  simplified_text = simplified_text[:start] + annotated + simplified_text[end:]
92
  offset += len(annotated) - len(term)
93
-
94
  print(f"\nTerm: {term}")
95
  print(f"Explanation: {explanation}")
96
-
97
  print("\nSimplified text:")
98
  print(simplified_text)
99
  return simplified_text
 
 
1
  import spacy
 
2
  import re
3
  import sys
4
+ from transformers import pipeline
 
 
 
 
 
 
5
 
6
  class MedicalTextSimplifier:
7
  def __init__(self):
8
  print("Loading models...")
9
  try:
10
+ # Load SciSpaCy model
11
  self.nlp = spacy.load("en_core_sci_sm")
12
+
13
+ # Load BioMedLM via Hugging Face pipeline
14
+ self.pipe = pipeline("text-generation", model="stanford-crfm/BioMedLM", device=-1)
15
+
 
 
 
 
 
 
16
  print("Models loaded successfully!")
17
  except Exception as e:
18
  print(f"Error loading models: {e}")
 
19
  sys.exit(1)
20
+
21
  def identify_medical_terms(self, text):
22
+ """Identify biomedical terms using SciSpaCy"""
23
  doc = self.nlp(text)
24
  terms = []
25
  for ent in doc.ents:
26
+ terms.append({
27
+ 'term': ent.text,
28
+ 'start': ent.start_char,
29
+ 'end': ent.end_char
30
+ })
31
  return terms
32
+
33
  def generate_simplified_explanation(self, term, context):
34
+ """Generate plain-language explanation using BioMedLM"""
35
  try:
36
  prompt = f"Explain the medical term '{term}' in simple language for a patient. Context: {context}\nExplanation:"
37
+ result = self.pipe(prompt, max_length=100, do_sample=True, temperature=0.7, top_p=0.9)
38
+ explanation = result[0]['generated_text'].split("Explanation:")[-1].strip()
 
 
 
 
 
 
 
 
 
 
 
 
39
  return explanation
40
  except Exception as e:
41
+ print(f"Error generating explanation for '{term}': {e}")
42
  return f"a medical term related to {term}"
43
+
44
  def simplify_text(self, text):
45
  print("\nOriginal text:")
46
  print(text)
47
+
48
  print("\nIdentifying medical terms using SciSpaCy...")
49
  medical_terms = self.identify_medical_terms(text)
50
+
51
  if not medical_terms:
52
  print("No medical terms found.")
53
  return text
54
+
55
  simplified_text = text
56
+ offset = 0 # Adjust positions as we insert explanations
57
+
58
+ print("\nMedical terms and simplified explanations:")
59
  for item in medical_terms:
60
  term = item['term']
61
  start = item['start'] + offset
62
  end = item['end'] + offset
63
+
64
  explanation = self.generate_simplified_explanation(term, text)
65
  annotated = f"{term} ({explanation})"
66
+
67
+ # Replace in the text
68
  simplified_text = simplified_text[:start] + annotated + simplified_text[end:]
69
  offset += len(annotated) - len(term)
70
+
71
  print(f"\nTerm: {term}")
72
  print(f"Explanation: {explanation}")
73
+
74
  print("\nSimplified text:")
75
  print(simplified_text)
76
  return simplified_text