tiny-chat / app.py
amusktweewt's picture
Update app.py
58bd705 verified
raw
history blame
5.49 kB
import gradio as gr
from huggingface_hub import InferenceClient
# -- 1) DEFINE YOUR MODELS HERE --
models = [
{
"name": "Tiny Model",
"description": "A small chat model.",
"id": "amusktweewt/tiny-model-500M-chat-v2",
"enabled": True
},
{
"name": "Another Model",
"description": "A bigger chat model (disabled).",
"id": "another-model",
"enabled": False
}
]
def get_selected_model_id(evt: gr.SelectData):
"""Helper to extract the model ID from dropdown selection"""
return models[evt.index]["id"] if models[evt.index]["enabled"] else models[0]["id"]
def respond(message, history: list[tuple[str, str]], model_id, system_message, max_tokens, temperature, top_p):
"""
Builds a chat prompt using a simple template:
- Optionally includes a system message.
- Iterates over conversation history (each exchange as a tuple of (user, assistant)).
- Adds the new user message and appends an empty assistant turn.
Then it streams the response from the model.
"""
# -- 2) Instantiate the InferenceClient using the chosen model --
client = InferenceClient(model_id)
# Build the messages list.
messages = []
if system_message:
messages.append({"role": "system", "content": system_message})
if history:
for user_msg, bot_msg in history:
messages.append({"role": "user", "content": user_msg})
messages.append({"role": "assistant", "content": bot_msg})
messages.append({"role": "user", "content": message})
messages.append({"role": "assistant", "content": ""})
response_text = ""
# Stream the response token-by-token.
for resp in client.chat_completion(
messages,
max_tokens=max_tokens,
stream=True,
temperature=temperature,
top_p=top_p,
):
token = resp.choices[0].delta.content
response_text += token
yield response_text
# -- 3) BUILD THE UI WITH A PROPER GRADIO DROPDOWN --
with gr.Blocks(css="""
.container {
max-width: 900px !important;
margin-left: auto;
margin-right: auto;
}
#chatbot {
height: 600px !important;
}
.model-dropdown .gr-dropdown {
border-radius: 8px;
}
""") as demo:
with gr.Row():
with gr.Column(elem_classes="container"):
# Create proper Gradio Dropdown that will respect theme
model_choices = [f"{m['name']}: {m['description']}" for m in models]
model_dropdown = gr.Dropdown(
choices=model_choices,
value=model_choices[0],
label="Select Model",
elem_classes="model-dropdown",
scale=3
)
# Hidden textbox to store the current model ID (will be read by 'respond')
model_id = gr.Textbox(
value=models[0]["id"],
visible=False,
elem_id="hidden_model"
)
# Update the hidden model_id when dropdown changes
def update_model_id(evt):
selected_index = evt.index
if models[selected_index]["enabled"]:
return models[selected_index]["id"]
else:
# If disabled model selected, stay with default
return models[0]["id"]
model_dropdown.select(
update_model_id,
inputs=[],
outputs=[model_id]
)
# System message and parameter controls in a collapsible section
with gr.Accordion("Advanced Settings", open=False):
system_message = gr.Textbox(
value="You are a friendly Chatbot.",
label="System message"
)
with gr.Row():
with gr.Column(scale=1):
max_tokens = gr.Slider(
minimum=1,
maximum=2048,
value=512,
step=1,
label="Max new tokens"
)
with gr.Column(scale=1):
temperature = gr.Slider(
minimum=0.1,
maximum=4.0,
value=0.7,
step=0.1,
label="Temperature"
)
with gr.Column(scale=1):
top_p = gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.95,
step=0.05,
label="Top-p (nucleus sampling)"
)
# The ChatInterface with a larger chat area and our parameters
chat = gr.ChatInterface(
respond,
additional_inputs=[
model_id,
system_message,
max_tokens,
temperature,
top_p,
],
chatbot=gr.Chatbot(elem_id="chatbot", height=600)
)
if __name__ == "__main__":
demo.launch()