Spaces:

faisalshah012003
/

health-simplify-tool

Sleeping

App Files Files Community

faisalshah012003 commited on Jun 4

Commit

10dfafd

verified ·

1 Parent(s): 793f3a3

Update medical_simplifier.py

Browse files

Files changed (1) hide show

medical_simplifier.py +30 -53

medical_simplifier.py CHANGED Viewed

@@ -1,99 +1,76 @@
-import scispacy
 import spacy
-import torch
 import re
 import sys
-from transformers import AutoTokenizer, AutoModelForCausalLM
-from sklearn.metrics.pairwise import cosine_similarity
-import os
-# Set cache directory at module level
-os.environ["TRANSFORMERS_CACHE"] = "/tmp/huggingface_cache"
-os.environ["HF_HOME"] = "/tmp/huggingface_cache"
 class MedicalTextSimplifier:
     def __init__(self):
         print("Loading models...")
         try:
-            # Load SciSpaCy for term identification
             self.nlp = spacy.load("en_core_sci_sm")
-            # Load BioMedLM (Stanford)
-            self.tokenizer = AutoTokenizer.from_pretrained(
-                "stanford-crfm/BioMedLM",
-                cache_dir="/tmp/huggingface_cache"
-            )
-            self.model = AutoModelForCausalLM.from_pretrained(
-                "stanford-crfm/BioMedLM",
-                cache_dir="/tmp/huggingface_cache"
-            )
             print("Models loaded successfully!")
         except Exception as e:
             print(f"Error loading models: {e}")
-            print("Make sure all required packages are installed.")
             sys.exit(1)
     def identify_medical_terms(self, text):
-        """Identify medical entities using SciSpaCy"""
         doc = self.nlp(text)
         terms = []
         for ent in doc.ents:
-            terms.append({'term': ent.text, 'start': ent.start_char, 'end': ent.end_char})
         return terms
     def generate_simplified_explanation(self, term, context):
-        """Generate a plain-language explanation using BioMedLM"""
         try:
             prompt = f"Explain the medical term '{term}' in simple language for a patient. Context: {context}\nExplanation:"
-            inputs = self.tokenizer.encode(prompt, return_tensors="pt", max_length=512, truncation=True)
-            outputs = self.model.generate(
-                inputs,
-                max_length=100,
-                num_return_sequences=1,
-                temperature=0.7,
-                top_p=0.9,
-                do_sample=True,
-                eos_token_id=self.tokenizer.eos_token_id
-            )
-            explanation = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
-            explanation = explanation.split("Explanation:")[-1].strip()
             return explanation
         except Exception as e:
-            print(f"Error generating explanation: {e}")
             return f"a medical term related to {term}"
     def simplify_text(self, text):
         print("\nOriginal text:")
         print(text)
         print("\nIdentifying medical terms using SciSpaCy...")
         medical_terms = self.identify_medical_terms(text)
         if not medical_terms:
             print("No medical terms found.")
             return text
         simplified_text = text
-        offset = 0  # to keep track of text length changes during replacement
-        print("\nMedical terms and explanations:")
         for item in medical_terms:
             term = item['term']
             start = item['start'] + offset
             end = item['end'] + offset
             explanation = self.generate_simplified_explanation(term, text)
             annotated = f"{term} ({explanation})"
-            # Replace the term in text
             simplified_text = simplified_text[:start] + annotated + simplified_text[end:]
             offset += len(annotated) - len(term)
             print(f"\nTerm: {term}")
             print(f"Explanation: {explanation}")
         print("\nSimplified text:")
         print(simplified_text)
         return simplified_text

 import spacy
 import re
 import sys
+from transformers import pipeline
 class MedicalTextSimplifier:
     def __init__(self):
         print("Loading models...")
         try:
+            # Load SciSpaCy model
             self.nlp = spacy.load("en_core_sci_sm")
+            # Load BioMedLM via Hugging Face pipeline
+            self.pipe = pipeline("text-generation", model="stanford-crfm/BioMedLM", device=-1)
             print("Models loaded successfully!")
         except Exception as e:
             print(f"Error loading models: {e}")
             sys.exit(1)
     def identify_medical_terms(self, text):
+        """Identify biomedical terms using SciSpaCy"""
         doc = self.nlp(text)
         terms = []
         for ent in doc.ents:
+            terms.append({
+                'term': ent.text,
+                'start': ent.start_char,
+                'end': ent.end_char
+            })
         return terms
     def generate_simplified_explanation(self, term, context):
+        """Generate plain-language explanation using BioMedLM"""
         try:
             prompt = f"Explain the medical term '{term}' in simple language for a patient. Context: {context}\nExplanation:"
+            result = self.pipe(prompt, max_length=100, do_sample=True, temperature=0.7, top_p=0.9)
+            explanation = result[0]['generated_text'].split("Explanation:")[-1].strip()
             return explanation
         except Exception as e:
+            print(f"Error generating explanation for '{term}': {e}")
             return f"a medical term related to {term}"
     def simplify_text(self, text):
         print("\nOriginal text:")
         print(text)
         print("\nIdentifying medical terms using SciSpaCy...")
         medical_terms = self.identify_medical_terms(text)
         if not medical_terms:
             print("No medical terms found.")
             return text
         simplified_text = text
+        offset = 0  # Adjust positions as we insert explanations
+        print("\nMedical terms and simplified explanations:")
         for item in medical_terms:
             term = item['term']
             start = item['start'] + offset
             end = item['end'] + offset
             explanation = self.generate_simplified_explanation(term, text)
             annotated = f"{term} ({explanation})"
+            # Replace in the text
             simplified_text = simplified_text[:start] + annotated + simplified_text[end:]
             offset += len(annotated) - len(term)
             print(f"\nTerm: {term}")
             print(f"Explanation: {explanation}")
         print("\nSimplified text:")
         print(simplified_text)
         return simplified_text