Spaces:

amusktweewt
/

tiny-chat

Running

File size: 5,485 Bytes

import gradio as gr
from huggingface_hub import InferenceClient

# -- 1) DEFINE YOUR MODELS HERE --
models = [
    {
        "name": "Tiny Model",
        "description": "A small chat model.",
        "id": "amusktweewt/tiny-model-500M-chat-v2",
        "enabled": True
    },
    {
        "name": "Another Model",
        "description": "A bigger chat model (disabled).",
        "id": "another-model",
        "enabled": False
    }
]

def get_selected_model_id(evt: gr.SelectData):
    """Helper to extract the model ID from dropdown selection"""
    return models[evt.index]["id"] if models[evt.index]["enabled"] else models[0]["id"]

def respond(message, history: list[tuple[str, str]], model_id, system_message, max_tokens, temperature, top_p):
    """
    Builds a chat prompt using a simple template:
      - Optionally includes a system message.
      - Iterates over conversation history (each exchange as a tuple of (user, assistant)).
      - Adds the new user message and appends an empty assistant turn.
    Then it streams the response from the model.
    """
    # -- 2) Instantiate the InferenceClient using the chosen model --
    client = InferenceClient(model_id)

    # Build the messages list.
    messages = []
    if system_message:
        messages.append({"role": "system", "content": system_message})

    if history:
        for user_msg, bot_msg in history:
            messages.append({"role": "user", "content": user_msg})
            messages.append({"role": "assistant", "content": bot_msg})

    messages.append({"role": "user", "content": message})
    messages.append({"role": "assistant", "content": ""})

    response_text = ""
    # Stream the response token-by-token.
    for resp in client.chat_completion(
        messages,
        max_tokens=max_tokens,
        stream=True,
        temperature=temperature,
        top_p=top_p,
    ):
        token = resp.choices[0].delta.content
        response_text += token
        yield response_text

# -- 3) BUILD THE UI WITH A PROPER GRADIO DROPDOWN --
with gr.Blocks(css="""
    .container {
        max-width: 900px !important;
        margin-left: auto;
        margin-right: auto;
    }
    #chatbot {
        height: 600px !important;
    }
    .model-dropdown .gr-dropdown {
        border-radius: 8px;
    }
""") as demo:
    with gr.Row():
        with gr.Column(elem_classes="container"):
            # Create proper Gradio Dropdown that will respect theme
            model_choices = [f"{m['name']}: {m['description']}" for m in models]
            model_dropdown = gr.Dropdown(
                choices=model_choices,
                value=model_choices[0],
                label="Select Model",
                elem_classes="model-dropdown",
                scale=3
            )
            
            # Hidden textbox to store the current model ID (will be read by 'respond')
            model_id = gr.Textbox(
                value=models[0]["id"],
                visible=False,
                elem_id="hidden_model"
            )
            
            # Update the hidden model_id when dropdown changes
            def update_model_id(evt):
                selected_index = evt.index
                if models[selected_index]["enabled"]:
                    return models[selected_index]["id"]
                else:
                    # If disabled model selected, stay with default
                    return models[0]["id"]
                
            model_dropdown.select(
                update_model_id,
                inputs=[],
                outputs=[model_id]
            )
            
            # System message and parameter controls in a collapsible section
            with gr.Accordion("Advanced Settings", open=False):
                system_message = gr.Textbox(
                    value="You are a friendly Chatbot.",
                    label="System message"
                )
                
                with gr.Row():
                    with gr.Column(scale=1):
                        max_tokens = gr.Slider(
                            minimum=1,
                            maximum=2048,
                            value=512,
                            step=1,
                            label="Max new tokens"
                        )
                    
                    with gr.Column(scale=1):
                        temperature = gr.Slider(
                            minimum=0.1,
                            maximum=4.0,
                            value=0.7,
                            step=0.1,
                            label="Temperature"
                        )
                        
                    with gr.Column(scale=1):
                        top_p = gr.Slider(
                            minimum=0.1,
                            maximum=1.0,
                            value=0.95,
                            step=0.05,
                            label="Top-p (nucleus sampling)"
                        )
            
            # The ChatInterface with a larger chat area and our parameters
            chat = gr.ChatInterface(
                respond,
                additional_inputs=[
                    model_id,
                    system_message,
                    max_tokens,
                    temperature,
                    top_p,
                ],
                chatbot=gr.Chatbot(elem_id="chatbot", height=600)
            )

if __name__ == "__main__":
    demo.launch()