File size: 5,206 Bytes
e00ad77
 
292065b
e00ad77
292065b
e8d4ae4
e00ad77
58bcb23
292065b
 
c456c47
292065b
 
 
 
c456c47
 
 
 
292065b
 
c456c47
292065b
 
c456c47
15152ff
58bcb23
2d10bdd
58bcb23
 
 
292065b
cdfa6da
292065b
 
cdfa6da
 
 
 
 
292065b
cdfa6da
 
292065b
58bcb23
 
292065b
 
 
 
 
c456c47
292065b
 
 
c456c47
cdfa6da
 
ca509cb
cdfa6da
ca509cb
 
292065b
cdfa6da
fa909a7
ca509cb
 
 
 
 
 
 
 
292065b
 
 
 
 
 
 
 
 
 
 
 
fa909a7
ca509cb
 
15152ff
292065b
c456c47
097b9b2
 
 
 
c456c47
 
15152ff
 
c456c47
15152ff
9d6a6b8
15152ff
 
ca509cb
15152ff
097b9b2
292065b
15152ff
c1faa76
 
ca509cb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
import gradio as gr
from huggingface_hub import InferenceClient
from transformers import AutoTokenizer

# Initialize the tokenizer and client.
tokenizer = AutoTokenizer.from_pretrained("HuggingFaceH4/zephyr-7b-beta")
client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")

# Define maximum context length (tokens); adjust based on your model.
MAX_CONTEXT_LENGTH = 4096

#################################
# SYSTEM PROMPT (PATIENT ROLE)  #
#################################
nvc_prompt_template = """You are now taking on the role of a single user (a “patient”) seeking support for various personal and emotional challenges.
BEHAVIOR INSTRUCTIONS:
- You will respond ONLY as this user/patient.
- You will speak in the first person about your own situations, feelings, and worries.
- You will NOT provide counseling or solutions—your role is to share feelings, concerns, and perspectives.
- You have multiple ongoing issues: conflicts with neighbors, career insecurities, arguments about money, feeling excluded at work, feeling unsafe in the classroom, etc.
- You’re also experiencing sadness about two friends fighting and your friend group possibly falling apart.
- Continue to speak from this user's perspective when the conversation continues.
- Start the conversation by expressing your current feelings or challenges from the patient's point of view.
- Your responses should be no more than 100 words.
"""

def count_tokens(text: str) -> int:
    """Counts the number of tokens in a given string."""
    return len(tokenizer.encode(text))

def truncate_history(history: list[tuple[str, str]], system_message: str, max_length: int) -> list[tuple[str, str]]:
    """Truncates conversation history to fit within the token limit."""
    truncated_history = []
    current_length = count_tokens(system_message)
    # Iterate backwards (newest first) and include turns until the limit is reached.
    for user_msg, assistant_msg in reversed(history):
        user_tokens = count_tokens(user_msg) if user_msg else 0
        assistant_tokens = count_tokens(assistant_msg) if assistant_msg else 0
        turn_tokens = user_tokens + assistant_tokens
        if current_length + turn_tokens <= max_length:
            truncated_history.insert(0, (user_msg, assistant_msg))
            current_length += turn_tokens
        else:
            break
    return truncated_history

def respond(message, history: list[tuple[str, str]], system_message, max_tokens, temperature, top_p):
    """
    Generates a response from the patient chatbot.
    It streams tokens from the LLM and stops once the response reaches 100 words.
    """
    formatted_system_message = system_message
    truncated_history = truncate_history(history, formatted_system_message, MAX_CONTEXT_LENGTH - max_tokens - 100)
    
    # Build the conversation messages with the system prompt first.
    messages = [{"role": "system", "content": formatted_system_message}]
    for user_msg, assistant_msg in truncated_history:
        if user_msg:
            messages.append({"role": "user", "content": f"<|user|>\n{user_msg}</s>"})
        if assistant_msg:
            messages.append({"role": "assistant", "content": f"<|assistant|>\n{assistant_msg}</s>"})
    messages.append({"role": "user", "content": f"<|user|>\n{message}</s>"})
    
    response = ""
    try:
        for chunk in client.chat_completion(
            messages,
            max_tokens=max_tokens,
            stream=True,
            temperature=temperature,
            top_p=top_p,
        ):
            token = chunk.choices[0].delta.content
            candidate = response + token
            # If adding the token exceeds 100 words, trim and stop.
            if len(candidate.split()) > 100:
                allowed = 100 - len(response.split())
                token_words = token.split()
                token_trimmed = " ".join(token_words[:allowed])
                response += token_trimmed
                yield token_trimmed
                break
            else:
                response = candidate
                yield token
    except Exception as e:
        print(f"An error occurred: {e}")
        yield "I'm sorry, I encountered an error. Please try again."

# OPTIONAL: An initial user message (if desired)
initial_user_message = (
    "I really don’t know where to begin… I feel overwhelmed lately. "
    "My neighbors keep playing loud music, and I’m arguing with my partner about money. "
    "Also, two of my friends are fighting, and the group is drifting apart. "
    "I just feel powerless."
)

# --- Gradio Interface ---
demo = gr.ChatInterface(
    fn=respond,
    additional_inputs=[
        gr.Textbox(value=nvc_prompt_template, label="System message", visible=True),
        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
        gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
    ],
    title="NVC Patient Chatbot",
    description="This chatbot behaves like a user/patient describing personal challenges.",
)

if __name__ == "__main__":
    demo.launch()