Spaces:
Running
Running
File size: 5,485 Bytes
f385f69 b1bd07a 9a55f2c 2471f2f 9a55f2c 3079ffd 58bd705 9a55f2c b1bd07a 58bd705 b1bd07a 58bd705 9a55f2c 58bd705 9a55f2c 3079ffd 58bd705 3079ffd 58bd705 f385f69 3079ffd 58bd705 3079ffd 58bd705 3079ffd f385f69 3079ffd f385f69 58bd705 f385f69 58bd705 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 |
import gradio as gr
from huggingface_hub import InferenceClient
# -- 1) DEFINE YOUR MODELS HERE --
models = [
{
"name": "Tiny Model",
"description": "A small chat model.",
"id": "amusktweewt/tiny-model-500M-chat-v2",
"enabled": True
},
{
"name": "Another Model",
"description": "A bigger chat model (disabled).",
"id": "another-model",
"enabled": False
}
]
def get_selected_model_id(evt: gr.SelectData):
"""Helper to extract the model ID from dropdown selection"""
return models[evt.index]["id"] if models[evt.index]["enabled"] else models[0]["id"]
def respond(message, history: list[tuple[str, str]], model_id, system_message, max_tokens, temperature, top_p):
"""
Builds a chat prompt using a simple template:
- Optionally includes a system message.
- Iterates over conversation history (each exchange as a tuple of (user, assistant)).
- Adds the new user message and appends an empty assistant turn.
Then it streams the response from the model.
"""
# -- 2) Instantiate the InferenceClient using the chosen model --
client = InferenceClient(model_id)
# Build the messages list.
messages = []
if system_message:
messages.append({"role": "system", "content": system_message})
if history:
for user_msg, bot_msg in history:
messages.append({"role": "user", "content": user_msg})
messages.append({"role": "assistant", "content": bot_msg})
messages.append({"role": "user", "content": message})
messages.append({"role": "assistant", "content": ""})
response_text = ""
# Stream the response token-by-token.
for resp in client.chat_completion(
messages,
max_tokens=max_tokens,
stream=True,
temperature=temperature,
top_p=top_p,
):
token = resp.choices[0].delta.content
response_text += token
yield response_text
# -- 3) BUILD THE UI WITH A PROPER GRADIO DROPDOWN --
with gr.Blocks(css="""
.container {
max-width: 900px !important;
margin-left: auto;
margin-right: auto;
}
#chatbot {
height: 600px !important;
}
.model-dropdown .gr-dropdown {
border-radius: 8px;
}
""") as demo:
with gr.Row():
with gr.Column(elem_classes="container"):
# Create proper Gradio Dropdown that will respect theme
model_choices = [f"{m['name']}: {m['description']}" for m in models]
model_dropdown = gr.Dropdown(
choices=model_choices,
value=model_choices[0],
label="Select Model",
elem_classes="model-dropdown",
scale=3
)
# Hidden textbox to store the current model ID (will be read by 'respond')
model_id = gr.Textbox(
value=models[0]["id"],
visible=False,
elem_id="hidden_model"
)
# Update the hidden model_id when dropdown changes
def update_model_id(evt):
selected_index = evt.index
if models[selected_index]["enabled"]:
return models[selected_index]["id"]
else:
# If disabled model selected, stay with default
return models[0]["id"]
model_dropdown.select(
update_model_id,
inputs=[],
outputs=[model_id]
)
# System message and parameter controls in a collapsible section
with gr.Accordion("Advanced Settings", open=False):
system_message = gr.Textbox(
value="You are a friendly Chatbot.",
label="System message"
)
with gr.Row():
with gr.Column(scale=1):
max_tokens = gr.Slider(
minimum=1,
maximum=2048,
value=512,
step=1,
label="Max new tokens"
)
with gr.Column(scale=1):
temperature = gr.Slider(
minimum=0.1,
maximum=4.0,
value=0.7,
step=0.1,
label="Temperature"
)
with gr.Column(scale=1):
top_p = gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.95,
step=0.05,
label="Top-p (nucleus sampling)"
)
# The ChatInterface with a larger chat area and our parameters
chat = gr.ChatInterface(
respond,
additional_inputs=[
model_id,
system_message,
max_tokens,
temperature,
top_p,
],
chatbot=gr.Chatbot(elem_id="chatbot", height=600)
)
if __name__ == "__main__":
demo.launch() |