Spaces:

techindia2025
/

medbot_2

Running on Zero

App Files Files Community

techindia2025 commited on May 22

Commit

7dd1c93

verified ·

1 Parent(s): b6f3058

Update app.py

Browse files

Files changed (1) hide show

app.py +101 -60

app.py CHANGED Viewed

@@ -1,16 +1,17 @@
-from langchain.chains import ConversationChain, LLMChain
-from langchain.prompts import PromptTemplate
-from langchain.llms import HuggingFacePipeline
-from langchain.memory import ConversationBufferMemory
-from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
-import torch
 import gradio as gr
 # Model configuration
 LLAMA_MODEL = "meta-llama/Llama-2-7b-chat-hf"
 MEDITRON_MODEL = "epfl-llm/meditron-7b"
-# System prompts
 SYSTEM_PROMPT = """You are a professional virtual doctor. Your goal is to collect detailed information about the user's health condition, symptoms, medical history, medications, lifestyle, and other relevant data.
 Ask 1-2 follow-up questions at a time to gather more details about:
 - Detailed description of symptoms
@@ -37,55 +38,61 @@ Patient information: {patient_info}
 <|im_start|>assistant
 """
-print("Loading Llama-2 model...")
-# Create LangChain wrapper for Llama-2
-llama_tokenizer = AutoTokenizer.from_pretrained(LLAMA_MODEL)
-llama_model = AutoModelForCausalLM.from_pretrained(
-    LLAMA_MODEL,
-    torch_dtype=torch.float16,
-    device_map="auto"
-)
-# Create a pipeline for LangChain
-llama_pipeline = pipeline(
-    "text-generation",
-    model=llama_model,
-    tokenizer=llama_tokenizer,
-    max_new_tokens=512,
-    temperature=0.7,
-    top_p=0.9,
-    do_sample=True
-)
-llama_llm = HuggingFacePipeline(pipeline=llama_pipeline)
-print("Llama-2 model loaded successfully!")
-print("Loading Meditron model...")
-meditron_tokenizer = AutoTokenizer.from_pretrained(MEDITRON_MODEL)
-meditron_model = AutoModelForCausalLM.from_pretrained(
-    MEDITRON_MODEL,
-    torch_dtype=torch.float16,
-    device_map="auto"
-)
-# Create a pipeline for Meditron
-meditron_pipeline = pipeline(
-    "text-generation",
-    model=meditron_model,
-    tokenizer=meditron_tokenizer,
-    max_new_tokens=256,
-    temperature=0.7,
-    top_p=0.9,
-    do_sample=True
-)
-meditron_llm = HuggingFacePipeline(pipeline=meditron_pipeline)
-print("Meditron model loaded successfully!")
 # Create LangChain conversation with memory
 memory = ConversationBufferMemory(return_messages=True)
-conversation = ConversationChain(
-    llm=llama_llm,
-    memory=memory,
-    verbose=True
-)
 # Create a template for the Meditron model
 meditron_template = PromptTemplate(
@@ -98,10 +105,7 @@ meditron_chain = LLMChain(
     verbose=True
 )
-# Track conversation turns
-conversation_turns = 0
-patient_data = []
 def generate_response(message, history):
     global conversation_turns, patient_data
     conversation_turns += 1
@@ -116,16 +120,53 @@ def generate_response(message, history):
     else:
         prompt = f"{SYSTEM_PROMPT}\n\n{message}"
-    # Generate response using LangChain conversation
-    llama_response = conversation.predict(input=prompt)
     # After 4 turns, add medicine suggestions from Meditron
     if conversation_turns >= 4:
         # Collect full patient conversation
         full_patient_info = "\n".join(patient_data) + "\n\nSummary: " + llama_response
-        # Get medicine suggestions using LangChain
-        medicine_suggestions = meditron_chain.run(patient_info=full_patient_info)
         # Format final response
         final_response = (
@@ -151,4 +192,4 @@ demo = gr.ChatInterface(
 )
 if __name__ == "__main__":
-    demo.launch()

 import gradio as gr
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
+import spaces
+from langchain_community.llms import HuggingFacePipeline
+from langchain_core.prompts import PromptTemplate
+from langchain.chains import LLMChain
+from langchain_core.runnables import RunnableWithMessageHistory
+from langchain.memory import ConversationBufferMemory
 # Model configuration
 LLAMA_MODEL = "meta-llama/Llama-2-7b-chat-hf"
 MEDITRON_MODEL = "epfl-llm/meditron-7b"
 SYSTEM_PROMPT = """You are a professional virtual doctor. Your goal is to collect detailed information about the user's health condition, symptoms, medical history, medications, lifestyle, and other relevant data.
 Ask 1-2 follow-up questions at a time to gather more details about:
 - Detailed description of symptoms
 <|im_start|>assistant
 """
+# Track conversation turns
+conversation_turns = 0
+patient_data = []
+# Create a GPU-decorated function for model loading
+@spaces.GPU
+def load_models():
+    print("Loading Llama-2 model...")
+    llama_tokenizer = AutoTokenizer.from_pretrained(LLAMA_MODEL)
+    llama_model = AutoModelForCausalLM.from_pretrained(
+        LLAMA_MODEL,
+        torch_dtype=torch.float16,
+        device_map="auto"
+    )
+    # Create a pipeline for LangChain
+    llama_pipeline = pipeline(
+        "text-generation",
+        model=llama_model,
+        tokenizer=llama_tokenizer,
+        max_new_tokens=512,
+        temperature=0.7,
+        top_p=0.9,
+        do_sample=True
+    )
+    llama_llm = HuggingFacePipeline(pipeline=llama_pipeline)
+    print("Llama-2 model loaded successfully!")
+    print("Loading Meditron model...")
+    meditron_tokenizer = AutoTokenizer.from_pretrained(MEDITRON_MODEL)
+    meditron_model = AutoModelForCausalLM.from_pretrained(
+        MEDITRON_MODEL,
+        torch_dtype=torch.float16,
+        device_map="auto"
+    )
+    # Create a pipeline for Meditron
+    meditron_pipeline = pipeline(
+        "text-generation",
+        model=meditron_model,
+        tokenizer=meditron_tokenizer,
+        max_new_tokens=256,
+        temperature=0.7,
+        top_p=0.9,
+        do_sample=True
+    )
+    meditron_llm = HuggingFacePipeline(pipeline=meditron_pipeline)
+    print("Meditron model loaded successfully!")
+    return llama_llm, meditron_llm, llama_tokenizer, meditron_tokenizer
+# Load models
+llama_llm, meditron_llm, llama_tokenizer, meditron_tokenizer = load_models()
 # Create LangChain conversation with memory
 memory = ConversationBufferMemory(return_messages=True)
 # Create a template for the Meditron model
 meditron_template = PromptTemplate(
     verbose=True
 )
+@spaces.GPU
 def generate_response(message, history):
     global conversation_turns, patient_data
     conversation_turns += 1
     else:
         prompt = f"{SYSTEM_PROMPT}\n\n{message}"
+    # Build the prompt with proper Llama-2 formatting
+    formatted_prompt = f"<s>[INST] <<SYS>>\n{SYSTEM_PROMPT}\n<</SYS>>\n\n"
+    # Add conversation history
+    for user_msg, assistant_msg in history:
+        formatted_prompt += f"{user_msg} [/INST] {assistant_msg} </s><s>[INST] "
+    # Add the current user input
+    formatted_prompt += f"{message} [/INST] "
+    # Generate response using Llama model
+    inputs = llama_tokenizer(formatted_prompt, return_tensors="pt").to("cuda")
+    with torch.no_grad():
+        outputs = llama_llm.pipeline.model.generate(
+            inputs.input_ids,
+            attention_mask=inputs.attention_mask,
+            max_new_tokens=512,
+            temperature=0.7,
+            top_p=0.9,
+            do_sample=True,
+            pad_token_id=llama_tokenizer.eos_token_id
+        )
+    # Decode and extract Llama-2's response
+    full_response = llama_tokenizer.decode(outputs[0], skip_special_tokens=False)
+    llama_response = full_response.split('[/INST]')[-1].split('</s>')[0].strip()
     # After 4 turns, add medicine suggestions from Meditron
     if conversation_turns >= 4:
         # Collect full patient conversation
         full_patient_info = "\n".join(patient_data) + "\n\nSummary: " + llama_response
+        # Get medicine suggestions using Meditron
+        inputs = meditron_tokenizer(MEDITRON_PROMPT.format(patient_info=full_patient_info), return_tensors="pt").to("cuda")
+        with torch.no_grad():
+            outputs = meditron_llm.pipeline.model.generate(
+                inputs.input_ids,
+                attention_mask=inputs.attention_mask,
+                max_new_tokens=256,
+                temperature=0.7,
+                top_p=0.9,
+                do_sample=True
+            )
+        medicine_suggestions = meditron_tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
         # Format final response
         final_response = (
 )
 if __name__ == "__main__":
+    demo.launch()