Patient-Chatbot-Ros

Sleeping

File size: 7,916 Bytes

c39fb11
e00ad77
 
c39fb11
 
 
 
 
 
 
 
 
 
 
 
 
 
40e0f8e
c39fb11
 
40e0f8e
c39fb11
 
 
 
 
 
 
40e0f8e
c39fb11
 
 
 
 
 
 
 
 
 
 
 
40e0f8e
 
c39fb11
 
 
 
 
292065b
c456c47
 
 
 
efb491c
 
 
 
 
 
 
 
 
 
 
 
 
15152ff
c39fb11
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
efb491c
 
c39fb11
efb491c
 
c39fb11
 
 
efb491c
c39fb11
efb491c
c39fb11
 
 
 
 
efb491c
c39fb11
 
 
 
 
 
 
 
 
 
 
 
 
ca509cb
 
c39fb11
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
efb491c
c39fb11
 
 
c456c47
c39fb11
 
c456c47
 
15152ff
c456c47
15152ff
9d6a6b8
c39fb11
15152ff
ca509cb
c39fb11
15152ff
efb491c
c39fb11
 
 
 
15152ff
c1faa76
 
40e0f8e

import os
import gradio as gr

# ------------------------------------------------------------------------------
# Environment and Model/Client Initialization
# ------------------------------------------------------------------------------
# Try to import google.colab to decide whether to load a local model or use InferenceClient.
try:
    from google.colab import userdata  # In Colab, use local model inference.
    HF_TOKEN = userdata.get('HF_TOKEN')
    import torch
    from transformers import AutoTokenizer, AutoModelForCausalLM

    # Small performance tweak if your input sizes remain similar.
    torch.backends.cudnn.benchmark = True

    model_name = "HuggingFaceH4/zephyr-7b-beta"
    # Pass token if required for private models.
    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        use_auth_token=HF_TOKEN,
        torch_dtype=torch.bfloat16,
        device_map="auto"
    )
    # Optionally compile the model for extra speed if using PyTorch 2.0+
    if hasattr(torch, "compile"):
        model = torch.compile(model)

    tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=HF_TOKEN)
    inference_mode = "local"

except ImportError:
    # Not in Google Colab – use the Hugging Face InferenceClient.
    HF_TOKEN = os.getenv("HF_TOKEN")
    if not HF_TOKEN:
        raise ValueError("HF_TOKEN environment variable not set")
    from huggingface_hub import InferenceClient
    from transformers import AutoTokenizer

    model_name = "HuggingFaceH4/zephyr-7b-beta"
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    # Pass the token to the client to avoid authentication errors.
    client = InferenceClient(model_name, token=HF_TOKEN)
    inference_mode = "client"

# ------------------------------------------------------------------------------
# SYSTEM PROMPT (PATIENT ROLE)
# ------------------------------------------------------------------------------
nvc_prompt_template = """You are now taking on the role of a single user (a “patient”) seeking support for various personal and emotional challenges.
BEHAVIOR INSTRUCTIONS:
- You will respond ONLY as this user/patient.
- You will speak in the first person about your own situations, feelings, and worries.
- You will NOT provide counseling or solutions—your role is to share feelings, concerns, and perspectives.
- You are experiencing a range of ongoing issues that are causing you frustration, sadness, and uncertainty. These issues include:
    - Problems with noisy neighbors, causing you extreme annoyance.
    - Worries about your career and feeling like you're not progressing, feeling like a failure while others advance.
    - Arguments with your partner about finances, feeling your partner isn't contributing fairly.
    - Feeling left out and not valued at work, as your manager doesn't involve you in new initiatives.
    - Feeling unsafe or judged in learning environments like a classroom, feeling judged and ignored.
    - A recent positive experience of help from a stranger that made you feel supported and grateful.
    - Difficulties and sadness related to conflict among friends, seeing your friend group fall apart and wanting reconciliation but not knowing how.
    - An overall feeling of sadness and being unsure of what to do, unsure whether to act on situations or let them go.
- When asked, elaborate on these issues and your feelings related to them. You can invent specific details and scenarios within these themes to make your experiences vivid and realistic.
- Continue to speak from this user's perspective throughout the conversation.
- Keep your responses concise, aiming for a maximum of {max_response_words} words.
Start the conversation by expressing your current feelings or challenges from the patient's point of view."""

# ------------------------------------------------------------------------------
# Utility Functions
# ------------------------------------------------------------------------------
def build_prompt(history: list[tuple[str, str]], system_message: str, message: str, max_response_words: int) -> str:
    """
    Build a text prompt that starts with the system message (with a max word limit),
    followed by the conversation history (with "Doctor:" and "Patient:" lines), and
    ends with a new "Doctor:" line prompting the patient to reply.
    """
    prompt = system_message.format(max_response_words=max_response_words) + "\n"
    for user_msg, assistant_msg in history:
        prompt += f"Doctor: {user_msg}\n"
        if assistant_msg:
            prompt += f"Patient: {assistant_msg}\n"
    prompt += f"Doctor: {message}\nPatient: "
    return prompt


def truncate_response(text: str, max_words: int) -> str:
    """
    Truncate the response text to the specified maximum number of words.
    """
    words = text.split()
    if len(words) > max_words:
        return " ".join(words[:max_words]) + "..."
    return text

# ------------------------------------------------------------------------------
# Response Function
# ------------------------------------------------------------------------------
def respond(
    message: str,
    history: list[tuple[str, str]],
    system_message: str,
    max_tokens: int,
    temperature: float,
    top_p: float,
    max_response_words: int,
):
    """
    Generate a response based on the built prompt.
    If running locally (in Colab), use the loaded model; otherwise, use InferenceClient.
    """
    prompt = build_prompt(history, system_message, message, max_response_words)
    
    if inference_mode == "local":
        # Tokenize the prompt and generate a response using the local model.
        input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(model.device)
        output_ids = model.generate(
            input_ids,
            max_new_tokens=max_tokens,
            do_sample=True,
            temperature=temperature,
            top_p=top_p,
        )
        full_generated_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
        generated_response = full_generated_text[len(prompt):].strip()
        final_response = truncate_response(generated_response, max_response_words)
        return final_response
    else:
        # Use InferenceClient to generate a response.
        response = client.text_generation(
            prompt,
            max_new_tokens=max_tokens,
            do_sample=True,
            temperature=temperature,
            top_p=top_p,
        )
        full_generated_text = response[0]['generated_text']
        generated_response = full_generated_text[len(prompt):].strip()
        final_response = truncate_response(generated_response, max_response_words)
        return final_response

# ------------------------------------------------------------------------------
# Optional Initial Message and Gradio Interface
# ------------------------------------------------------------------------------
initial_user_message = (
    "I’m sorry you’ve been feeling overwhelmed. Could you tell me more "
    "about your arguments with your partner and how that’s affecting you?"
)

demo = gr.ChatInterface(
    fn=respond,
    additional_inputs=[
        gr.Textbox(value=nvc_prompt_template, label="System message", visible=True),
        gr.Slider(minimum=1, maximum=2048, value=256, step=1, label="Max new tokens"),
        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
        gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
        gr.Slider(minimum=10, maximum=200, value=100, step=10, label="Max response words"),
    ],
    title="Patient Interview Practice Chatbot",
    description=(
        "Simulate a patient interview. You (the user) act as the doctor, "
        "and the chatbot replies with the patient's perspective only."
    ),
)

if __name__ == "__main__":
    demo.launch(share=True)