Spaces:

techindia2025
/

medbot_2

Running on Zero

App Files Files Community

techindia2025 commited on May 22

Commit

10736b1

verified ·

1 Parent(s): 1cb1a8e

Update app.py

Browse files

Files changed (1) hide show

app.py +106 -86

app.py CHANGED Viewed

@@ -3,8 +3,8 @@ import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer
 import spaces
-# Model configuration - Using only Me-LLaMA 13B-chat
-ME_LLAMA_MODEL = "clinicalnlplab/me-llama-13b-chat"
 # System prompts for different phases
 CONSULTATION_PROMPT = """You are a professional virtual doctor. Your goal is to collect detailed information about the user's health condition, symptoms, medical history, medications, lifestyle, and other relevant data.
@@ -36,6 +36,7 @@ patient_data = []
 def build_me_llama_prompt(system_prompt, history, user_input):
     """Format the conversation for Me-LLaMA chat model."""
     prompt = f"<s>[INST] <<SYS>>\n{system_prompt}\n<</SYS>>\n\n"
     # Add conversation history
@@ -53,15 +54,30 @@ def load_model_if_needed():
     global me_llama_model, me_llama_tokenizer
     if me_llama_model is None:
-        print("Loading Me-LLaMA 13B-chat model...")
-        me_llama_tokenizer = AutoTokenizer.from_pretrained(ME_LLAMA_MODEL)
-        me_llama_model = AutoModelForCausalLM.from_pretrained(
-            ME_LLAMA_MODEL,
-            torch_dtype=torch.float16,
-            device_map="auto",
-            trust_remote_code=True
-        )
-        print("Me-LLaMA 13B-chat model loaded successfully!")
 @spaces.GPU
 def generate_medicine_suggestions(patient_info):
@@ -96,91 +112,95 @@ def generate_response(message, history):
     """Generate response using only Me-LLaMA for both consultation and medicine suggestions."""
     global conversation_turns, patient_data
-    # Load model if needed
-    load_model_if_needed()
-    # Track conversation turns
-    conversation_turns += 1
-    # Store patient data
-    patient_data.append(message)
-    # Phase 1-3: Information gathering
-    if conversation_turns < 4:
-        # Build consultation prompt
-        prompt = build_me_llama_prompt(CONSULTATION_PROMPT, history, message)
-        inputs = me_llama_tokenizer(prompt, return_tensors="pt")
-        # Move inputs to the same device as the model
-        if torch.cuda.is_available():
-            inputs = {k: v.to(me_llama_model.device) for k, v in inputs.items()}
-        # Generate consultation response
-        with torch.no_grad():
-            outputs = me_llama_model.generate(
-                inputs["input_ids"],
-                attention_mask=inputs["attention_mask"],
-                max_new_tokens=400,
-                temperature=0.7,
-                top_p=0.9,
-                do_sample=True,
-                pad_token_id=me_llama_tokenizer.eos_token_id
-            )
-        # Decode response
-        full_response = me_llama_tokenizer.decode(outputs[0], skip_special_tokens=False)
-        response = full_response.split('[/INST]')[-1].split('</s>')[0].strip()
-        return response
-    # Phase 4+: Summary and medicine suggestions
-    else:
-        # First, get summary from consultation
-        summary_prompt = build_me_llama_prompt(
-            CONSULTATION_PROMPT + "\n\nNow summarize what you've learned and suggest when professional care may be needed.",
-            history,
-            message
-        )
-        inputs = me_llama_tokenizer(summary_prompt, return_tensors="pt")
-        if torch.cuda.is_available():
-            inputs = {k: v.to(me_llama_model.device) for k, v in inputs.items()}
-        # Generate summary
-        with torch.no_grad():
-            outputs = me_llama_model.generate(
-                inputs["input_ids"],
-                attention_mask=inputs["attention_mask"],
-                max_new_tokens=400,
-                temperature=0.7,
-                top_p=0.9,
-                do_sample=True,
-                pad_token_id=me_llama_tokenizer.eos_token_id
             )
-        summary_response = me_llama_tokenizer.decode(outputs[0], skip_special_tokens=False)
-        summary = summary_response.split('[/INST]')[-1].split('</s>')[0].strip()
-        # Then get medicine suggestions using the same model
-        full_patient_info = "\n".join(patient_data) + f"\n\nMedical Summary: {summary}"
-        medicine_suggestions = generate_medicine_suggestions(full_patient_info)
-        # Combine both responses
-        final_response = (
-            f"**MEDICAL SUMMARY:**\n{summary}\n\n"
-            f"**MEDICATION AND HOME CARE SUGGESTIONS:**\n{medicine_suggestions}\n\n"
-            f"**DISCLAIMER:** This is AI-generated advice for informational purposes only. Please consult a licensed healthcare provider for proper medical diagnosis and treatment."
-        )
-        return final_response
 # Create the Gradio interface
 demo = gr.ChatInterface(
     fn=generate_response,
     title="🏥 Complete Medical Assistant - Me-LLaMA 13B",
-    description="Comprehensive medical consultation powered by Me-LLaMA 13B-chat. One model handles both consultation and medicine suggestions. Tell me about your symptoms!",
     examples=[
         "I have a persistent cough and sore throat for 3 days",
         "I've been having severe headaches and feel dizzy",

 from transformers import AutoModelForCausalLM, AutoTokenizer
 import spaces
+# Model configuration - Using correct Me-LLaMA model identifier
+ME_LLAMA_MODEL = "clinicalnlplab/me-llama-13b"  # Corrected model name
 # System prompts for different phases
 CONSULTATION_PROMPT = """You are a professional virtual doctor. Your goal is to collect detailed information about the user's health condition, symptoms, medical history, medications, lifestyle, and other relevant data.
 def build_me_llama_prompt(system_prompt, history, user_input):
     """Format the conversation for Me-LLaMA chat model."""
+    # Use standard Llama-2 chat format since Me-LLaMA is based on Llama-2
     prompt = f"<s>[INST] <<SYS>>\n{system_prompt}\n<</SYS>>\n\n"
     # Add conversation history
     global me_llama_model, me_llama_tokenizer
     if me_llama_model is None:
+        print("Loading Me-LLaMA 13B model...")
+        try:
+            me_llama_tokenizer = AutoTokenizer.from_pretrained(
+                ME_LLAMA_MODEL,
+                trust_remote_code=True
+            )
+            me_llama_model = AutoModelForCausalLM.from_pretrained(
+                ME_LLAMA_MODEL,
+                torch_dtype=torch.float16,
+                device_map="auto",
+                trust_remote_code=True
+            )
+            print("Me-LLaMA 13B model loaded successfully!")
+        except Exception as e:
+            print(f"Error loading model: {e}")
+            # Fallback to a working medical model
+            print("Falling back to Llama-2-7b-chat-hf...")
+            me_llama_tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-chat-hf")
+            me_llama_model = AutoModelForCausalLM.from_pretrained(
+                "meta-llama/Llama-2-7b-chat-hf",
+                torch_dtype=torch.float16,
+                device_map="auto"
+            )
+            print("Fallback model loaded successfully!")
 @spaces.GPU
 def generate_medicine_suggestions(patient_info):
     """Generate response using only Me-LLaMA for both consultation and medicine suggestions."""
     global conversation_turns, patient_data
+    try:
+        # Load model if needed
+        load_model_if_needed()
+        # Track conversation turns
+        conversation_turns += 1
+        # Store patient data
+        patient_data.append(message)
+        # Phase 1-3: Information gathering
+        if conversation_turns < 4:
+            # Build consultation prompt
+            prompt = build_me_llama_prompt(CONSULTATION_PROMPT, history, message)
+            inputs = me_llama_tokenizer(prompt, return_tensors="pt")
+            # Move inputs to the same device as the model
+            if torch.cuda.is_available():
+                inputs = {k: v.to(me_llama_model.device) for k, v in inputs.items()}
+            # Generate consultation response
+            with torch.no_grad():
+                outputs = me_llama_model.generate(
+                    inputs["input_ids"],
+                    attention_mask=inputs["attention_mask"],
+                    max_new_tokens=400,
+                    temperature=0.7,
+                    top_p=0.9,
+                    do_sample=True,
+                    pad_token_id=me_llama_tokenizer.eos_token_id
+                )
+            # Decode response
+            full_response = me_llama_tokenizer.decode(outputs[0], skip_special_tokens=False)
+            response = full_response.split('[/INST]')[-1].split('</s>')[0].strip()
+            return response
+        # Phase 4+: Summary and medicine suggestions
+        else:
+            # First, get summary from consultation
+            summary_prompt = build_me_llama_prompt(
+                CONSULTATION_PROMPT + "\n\nNow summarize what you've learned and suggest when professional care may be needed.",
+                history,
+                message
             )
+            inputs = me_llama_tokenizer(summary_prompt, return_tensors="pt")
+            if torch.cuda.is_available():
+                inputs = {k: v.to(me_llama_model.device) for k, v in inputs.items()}
+            # Generate summary
+            with torch.no_grad():
+                outputs = me_llama_model.generate(
+                    inputs["input_ids"],
+                    attention_mask=inputs["attention_mask"],
+                    max_new_tokens=400,
+                    temperature=0.7,
+                    top_p=0.9,
+                    do_sample=True,
+                    pad_token_id=me_llama_tokenizer.eos_token_id
+                )
+            summary_response = me_llama_tokenizer.decode(outputs[0], skip_special_tokens=False)
+            summary = summary_response.split('[/INST]')[-1].split('</s>')[0].strip()
+            # Then get medicine suggestions using the same model
+            full_patient_info = "\n".join(patient_data) + f"\n\nMedical Summary: {summary}"
+            medicine_suggestions = generate_medicine_suggestions(full_patient_info)
+            # Combine both responses
+            final_response = (
+                f"**MEDICAL SUMMARY:**\n{summary}\n\n"
+                f"**MEDICATION AND HOME CARE SUGGESTIONS:**\n{medicine_suggestions}\n\n"
+                f"**DISCLAIMER:** This is AI-generated advice for informational purposes only. Please consult a licensed healthcare provider for proper medical diagnosis and treatment."
+            )
+            return final_response
+    except Exception as e:
+        return f"I apologize, but I'm experiencing technical difficulties. Please try again. Error: {str(e)}"
 # Create the Gradio interface
 demo = gr.ChatInterface(
     fn=generate_response,
     title="🏥 Complete Medical Assistant - Me-LLaMA 13B",
+    description="Comprehensive medical consultation powered by Me-LLaMA 13B. One model handles both consultation and medicine suggestions. Tell me about your symptoms!",
     examples=[
         "I have a persistent cough and sore throat for 3 days",
         "I've been having severe headaches and feel dizzy",