Spaces:

techindia2025
/

medbot_2

Running on Zero

App Files Files Community

Thanush commited on May 22

Commit

d6da22c

1 Parent(s): a985489

Enhance prompt building in app.py to include intelligent follow-up questions and adjust response generation logic based on user information turns.

Browse files

Files changed (1) hide show

app.py +24 -13

app.py CHANGED Viewed

@@ -65,15 +65,28 @@ print("Meditron model loaded successfully!")
 # Initialize LangChain memory
 memory = ConversationBufferMemory(return_messages=True)
-def build_llama2_prompt(system_prompt, messages, user_input):
-    """Format the conversation history and user input for Llama-2 chat models, using the full message sequence."""
     prompt = f"<s>[INST] <<SYS>>\n{system_prompt}\n<</SYS>>\n\n"
     for msg in messages:
         if msg.type == "human":
             prompt += f"{msg.content} [/INST] "
         elif msg.type == "ai":
             prompt += f"{msg.content} </s><s>[INST] "
-    prompt += f"{user_input} [/INST] "
     return prompt
 def get_meditron_suggestions(patient_info):
@@ -133,14 +146,14 @@ def generate_response(message, history):
             if not re.fullmatch(r".*(name|age|years? old|I'm|I am|my name is).*", msg.content, re.IGNORECASE):
                 info_turns += 1
-    prompt = build_llama2_prompt(SYSTEM_PROMPT, messages, message)
-    # Only add summarization ONCE, not on every turn after 4 info turns
-    if info_turns == 4:
-        prompt = prompt.replace("[/INST] ", "[/INST] Now summarize what you've learned and suggest when professional care may be needed. ")
     inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
-    # Generate the Llama-2 response
     with torch.no_grad():
         outputs = model.generate(
             inputs.input_ids,
@@ -151,13 +164,11 @@ def generate_response(message, history):
             do_sample=True,
             pad_token_id=tokenizer.eos_token_id
         )
-    # Decode and extract Llama-2's response
     full_response = tokenizer.decode(outputs[0], skip_special_tokens=False)
     llama_response = full_response.split('[/INST]')[-1].split('</s>')[0].strip()
-    # After 4 info turns, add medicine suggestions from Meditron, but only once
-    if info_turns == 4:
         full_patient_info = "\n".join([
             m.content for m in messages if m.type == "human" and not re.fullmatch(r".*(name|age|years? old|I'm|I am|my name is).*", m.content, re.IGNORECASE)
         ] + [message]) + "\n\nSummary: " + llama_response

 # Initialize LangChain memory
 memory = ConversationBufferMemory(return_messages=True)
+def build_llama2_prompt(system_prompt, messages, user_input, followup_stage=None):
     prompt = f"<s>[INST] <<SYS>>\n{system_prompt}\n<</SYS>>\n\n"
     for msg in messages:
         if msg.type == "human":
             prompt += f"{msg.content} [/INST] "
         elif msg.type == "ai":
             prompt += f"{msg.content} </s><s>[INST] "
+    # Add a specific follow-up question if in followup stage
+    if followup_stage is not None:
+        followup_questions = [
+            "Can you describe your main symptoms in detail?",
+            "How long have you been experiencing these symptoms?",
+            "On a scale of 1-10, how severe are your symptoms?",
+            "Have you noticed anything that makes your symptoms better or worse?",
+            "Do you have any other related symptoms, such as fever, fatigue, or shortness of breath?"
+        ]
+        if followup_stage < len(followup_questions):
+            prompt += f"{followup_questions[followup_stage]} [/INST] "
+        else:
+            prompt += f"{user_input} [/INST] "
+    else:
+        prompt += f"{user_input} [/INST] "
     return prompt
 def get_meditron_suggestions(patient_info):
             if not re.fullmatch(r".*(name|age|years? old|I'm|I am|my name is).*", msg.content, re.IGNORECASE):
                 info_turns += 1
+    # Ask up to 5 intelligent follow-up questions, then summarize/diagnose
+    if info_turns < 5:
+        prompt = build_llama2_prompt(SYSTEM_PROMPT, messages, message, followup_stage=info_turns)
+    else:
+        prompt = build_llama2_prompt(SYSTEM_PROMPT, messages, message)
+        prompt = prompt.replace("[/INST] ", "[/INST] Now, based on all the information, provide a likely diagnosis (if possible), and suggest when professional care may be needed. ")
     inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
     with torch.no_grad():
         outputs = model.generate(
             inputs.input_ids,
             do_sample=True,
             pad_token_id=tokenizer.eos_token_id
         )
     full_response = tokenizer.decode(outputs[0], skip_special_tokens=False)
     llama_response = full_response.split('[/INST]')[-1].split('</s>')[0].strip()
+    # After 5 info turns, add medicine suggestions from Meditron, but only once
+    if info_turns == 5:
         full_patient_info = "\n".join([
             m.content for m in messages if m.type == "human" and not re.fullmatch(r".*(name|age|years? old|I'm|I am|my name is).*", m.content, re.IGNORECASE)
         ] + [message]) + "\n\nSummary: " + llama_response