File size: 5,917 Bytes
f385f69
 
 
b1bd07a
9a55f2c
 
dc86e41
 
2471f2f
9a55f2c
 
 
dc86e41
 
9a55f2c
 
 
 
3079ffd
9a55f2c
 
b1bd07a
58bd705
 
 
 
 
b1bd07a
58bd705
9a55f2c
58bd705
 
9a55f2c
3079ffd
 
58bd705
3079ffd
 
 
 
58bd705
f385f69
3079ffd
58bd705
3079ffd
58bd705
3079ffd
dc86e41
 
 
 
 
f385f69
3079ffd
 
 
f385f69
dc86e41
 
 
 
 
58bd705
 
 
 
 
 
 
 
 
dc86e41
 
 
 
 
 
 
 
 
 
 
 
58bd705
 
 
 
dc86e41
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58bd705
 
 
 
dc86e41
58bd705
dc86e41
58bd705
 
 
 
 
 
dc86e41
58bd705
 
 
 
 
 
 
 
 
dc86e41
58bd705
 
 
 
 
 
 
 
dc86e41
58bd705
 
 
 
 
 
 
 
dc86e41
58bd705
 
 
 
 
 
 
 
 
 
 
 
f385f69
 
58bd705
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
import gradio as gr
from huggingface_hub import InferenceClient

# -- 1) DEFINE YOUR MODELS HERE --
models = [
    {
        "name": "Tiny Model 500M Chat v2",
        "description": "Original model with a context length of 1024 and single turn capabilities",
        "id": "amusktweewt/tiny-model-500M-chat-v2",
        "enabled": True
    },
    {
        "name": "New Model",
        "description": "(Disabled)",
        "id": "another-model",
        "enabled": False
    }
]


def respond(message, history: list[tuple[str, str]], model_id, system_message, max_tokens, temperature, top_p):
    """
    Builds a chat prompt using a simple template:
      - Optionally includes a system message.
      - Iterates over conversation history (each exchange as a tuple of (user, assistant)).
      - Adds the new user message and appends an empty assistant turn.
    Then it streams the response from the model.
    """
    # -- 2) Instantiate the InferenceClient using the chosen model --
    client = InferenceClient(model_id)

    # Build the messages list.
    messages = []
    if system_message:
        messages.append({"role": "system", "content": system_message})

    if history:
        for user_msg, bot_msg in history:
            messages.append({"role": "user", "content": user_msg})
            messages.append({"role": "assistant", "content": bot_msg})

    messages.append({"role": "user", "content": message})
    messages.append({"role": "assistant", "content": ""})

    response_text = ""
    # Stream the response token-by-token.
    for resp in client.chat_completion(
            messages,
            max_tokens=max_tokens,
            stream=True,
            temperature=temperature,
            top_p=top_p,
    ):
        token = resp.choices[0].delta.content
        response_text += token
        yield response_text


# Since Gradio doesn't support disabled options in dropdowns natively,
# we'll use a workaround with HTML and JavaScript

# -- 3) BUILD THE UI WITH CUSTOM DROPDOWN --
with gr.Blocks(css="""
    .container {
        max-width: 900px !important;
        margin-left: auto;
        margin-right: auto;
    }
    #chatbot {
        height: 600px !important;
    }
    /* CSS for disabling dropdown options */
    .disabled-option {
        color: #999 !important;
        background-color: #f0f0f0 !important;
        pointer-events: none !important;
    }
    /* Dark mode support */
    @media (prefers-color-scheme: dark) {
        .disabled-option {
            color: #666 !important;
            background-color: #333 !important;
        }
    }
""") as demo:
    with gr.Row():
        with gr.Column(elem_classes="container"):
            # Create custom HTML dropdown with properly disabled options
            dropdown_options = ""
            for model in models:
                value = model["id"]
                label = f"{model['name']}: {model['description']}"
                disabled = "" if model["enabled"] else 'disabled class="disabled-option"'
                dropdown_options += f'<option value="{value}" {disabled}>{label}</option>'

            dropdown_html = f"""
            <div style="margin-bottom: 20px;">
              <label for="model_select" style="display: block; margin-bottom: 8px; font-weight: bold;">Select Model:</label>
              <select id="model_select" style="width: 100%; padding: 8px; border-radius: 8px; 
                     border: 1px solid var(--border-color, #ccc); background-color: var(--background-fill-primary);"
                     onchange="document.getElementById('hidden_model_id').value = this.value; 
                              document.getElementById('hidden_model_id').dispatchEvent(new Event('input'));">
                {dropdown_options}
              </select>
            </div>
            """

            gr.HTML(value=dropdown_html)

            # Hidden textbox to store the current model ID (will be read by 'respond')
            model_id = gr.Textbox(
                value=models[0]["id"],
                visible=False,
                elem_id="hidden_model_id"
            )

            # System message and parameter controls in a collapsible section
            with gr.Accordion("Advanced Settings", open=False):
                system_message = gr.Textbox(
                    value="You are a friendly Chatbot.",
                    label="System message"
                )

                with gr.Row():
                    with gr.Column(scale=1):
                        max_tokens = gr.Slider(
                            minimum=1,
                            maximum=2048,
                            value=512,
                            step=1,
                            label="Max new tokens"
                        )

                    with gr.Column(scale=1):
                        temperature = gr.Slider(
                            minimum=0.1,
                            maximum=4.0,
                            value=0.7,
                            step=0.1,
                            label="Temperature"
                        )

                    with gr.Column(scale=1):
                        top_p = gr.Slider(
                            minimum=0.1,
                            maximum=1.0,
                            value=0.95,
                            step=0.05,
                            label="Top-p (nucleus sampling)"
                        )

            # The ChatInterface with a larger chat area and our parameters
            chat = gr.ChatInterface(
                respond,
                additional_inputs=[
                    model_id,
                    system_message,
                    max_tokens,
                    temperature,
                    top_p,
                ],
                chatbot=gr.Chatbot(elem_id="chatbot", height=600)
            )

if __name__ == "__main__":
    demo.launch()