Spaces:

techindia2025
/

medbot_2

Running on Zero

App Files Files Community

Thanush commited on May 22

Commit

c4447f4

1 Parent(s): 6196bed

Refactor app.py to integrate LangChain memory for conversation tracking and update requirements.txt for LangChain dependency

Browse files

Files changed (2) hide show

app.py +33 -28
requirements.txt +3 -0

app.py CHANGED Viewed

@@ -2,12 +2,13 @@ import gradio as gr
 import spaces
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer
 # Model configuration
 LLAMA_MODEL = "meta-llama/Llama-2-7b-chat-hf"
 MEDITRON_MODEL = "epfl-llm/meditron-7b"
-SYSTEM_PROMPT = """You are a professional virtual doctor. Your goal is to collect detailed information about the user's health condition, symptoms, medical history, medications, lifestyle, and other relevant data.
 Ask 1-2 follow-up questions at a time to gather more details about:
 - Detailed description of symptoms
 - Duration (when did it start?)
@@ -51,9 +52,8 @@ meditron_model = AutoModelForCausalLM.from_pretrained(
 )
 print("Meditron model loaded successfully!")
-# Conversation state tracking
-conversation_turns = {}
-patient_data = {}
 def build_llama2_prompt(system_prompt, history, user_input):
     """Format the conversation history and user input for Llama-2 chat models."""
@@ -89,26 +89,31 @@ def get_meditron_suggestions(patient_info):
 @spaces.GPU
 def generate_response(message, history):
     """Generate a response using both models."""
-    # Track conversation turns
-    session_id = "default-session"
-    if session_id not in conversation_turns:
-        conversation_turns[session_id] = 0
-    conversation_turns[session_id] += 1
-    # Store the entire conversation for reference
-    if session_id not in patient_data:
-        patient_data[session_id] = []
-    patient_data[session_id].append(message)
-    # Build the prompt with proper Llama-2 formatting
-    prompt = build_llama2_prompt(SYSTEM_PROMPT, history, message)
     # Add summarization instruction after 4 turns
-    if conversation_turns[session_id] >= 4:
         prompt = prompt.replace("[/INST] ", "[/INST] Now summarize what you've learned and suggest when professional care may be needed. ")
     inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
     # Generate the Llama-2 response
     with torch.no_grad():
         outputs = model.generate(
@@ -120,19 +125,19 @@ def generate_response(message, history):
             do_sample=True,
             pad_token_id=tokenizer.eos_token_id
         )
     # Decode and extract Llama-2's response
     full_response = tokenizer.decode(outputs[0], skip_special_tokens=False)
     llama_response = full_response.split('[/INST]')[-1].split('</s>')[0].strip()
     # After 4 turns, add medicine suggestions from Meditron
-    if conversation_turns[session_id] >= 4:
         # Collect full patient conversation
-        full_patient_info = "\n".join(patient_data[session_id]) + "\n\nSummary: " + llama_response
         # Get medicine suggestions
         medicine_suggestions = get_meditron_suggestions(full_patient_info)
         # Format final response
         final_response = (
             f"{llama_response}\n\n"
@@ -140,7 +145,7 @@ def generate_response(message, history):
             f"{medicine_suggestions}"
         )
         return final_response
     return llama_response
 # Create the Gradio interface

 import spaces
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer
+from langchain.memory import ConversationBufferMemory
 # Model configuration
 LLAMA_MODEL = "meta-llama/Llama-2-7b-chat-hf"
 MEDITRON_MODEL = "epfl-llm/meditron-7b"
+SYSTEM_PROMPT = """You are a professional virtual doctor. Your goal is to collect detailed information about the user's Name,age,health condition, symptoms, medical history, medications, lifestyle, and other relevant data.
 Ask 1-2 follow-up questions at a time to gather more details about:
 - Detailed description of symptoms
 - Duration (when did it start?)
 )
 print("Meditron model loaded successfully!")
+# Initialize LangChain memory
+memory = ConversationBufferMemory(return_messages=True)
 def build_llama2_prompt(system_prompt, history, user_input):
     """Format the conversation history and user input for Llama-2 chat models."""
 @spaces.GPU
 def generate_response(message, history):
     """Generate a response using both models."""
+    # Save the latest user message and last assistant response to memory
+    if history and len(history[-1]) == 2:
+        memory.save_context({"input": history[-1][0]}, {"output": history[-1][1]})
+    memory.save_context({"input": message}, {"output": ""})
+    # Build conversation history from memory
+    lc_history = []
+    user_msg = None
+    for msg in memory.chat_memory.messages:
+        if msg.type == "human":
+            user_msg = msg.content
+        elif msg.type == "ai" and user_msg is not None:
+            assistant_msg = msg.content
+            lc_history.append((user_msg, assistant_msg))
+            user_msg = None
+    # Build the prompt with LangChain memory history
+    prompt = build_llama2_prompt(SYSTEM_PROMPT, lc_history, message)
     # Add summarization instruction after 4 turns
+    if len(lc_history) >= 4:
         prompt = prompt.replace("[/INST] ", "[/INST] Now summarize what you've learned and suggest when professional care may be needed. ")
     inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
     # Generate the Llama-2 response
     with torch.no_grad():
         outputs = model.generate(
             do_sample=True,
             pad_token_id=tokenizer.eos_token_id
         )
     # Decode and extract Llama-2's response
     full_response = tokenizer.decode(outputs[0], skip_special_tokens=False)
     llama_response = full_response.split('[/INST]')[-1].split('</s>')[0].strip()
     # After 4 turns, add medicine suggestions from Meditron
+    if len(lc_history) >= 4:
         # Collect full patient conversation
+        full_patient_info = "\n".join([h[0] for h in lc_history] + [message]) + "\n\nSummary: " + llama_response
         # Get medicine suggestions
         medicine_suggestions = get_meditron_suggestions(full_patient_info)
         # Format final response
         final_response = (
             f"{llama_response}\n\n"
             f"{medicine_suggestions}"
         )
         return final_response
     return llama_response
 # Create the Gradio interface

requirements.txt CHANGED Viewed

@@ -21,3 +21,6 @@ aiofiles>=23.1.0
 # For better tensor operations
 numpy>=1.24.0

 # For better tensor operations
 numpy>=1.24.0
+# For LangChain memory
+langchain>=0.1.0