Spaces:
Running
Running
File size: 6,078 Bytes
038f313 4c18bfc 038f313 880ced6 038f313 cf508a7 038f313 cf508a7 69b4a5f 038f313 3a64d68 cf508a7 038f313 cf508a7 f7c4208 cf508a7 f7c4208 cf508a7 5b1509d 038f313 cf508a7 880ced6 f7c4208 cf508a7 038f313 cf508a7 038f313 f7c4208 cf508a7 038f313 21137c4 cf508a7 038f313 f7c4208 cf508a7 542c2ac cf508a7 fde397b f7c4208 cf508a7 f7c4208 cf508a7 f7c4208 7d3730f cf508a7 fde397b cf508a7 fde397b cf508a7 fde397b cf508a7 fde397b cf508a7 fde397b cf508a7 fde397b cf508a7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 |
import gradio as gr
from openai import OpenAI
import os
# Retrieve the access token from the environment variable
ACCESS_TOKEN = os.getenv("HF_TOKEN")
# Initialize the OpenAI API client
client = OpenAI(
base_url="https://api-inference.huggingface.co/v1/",
api_key=ACCESS_TOKEN,
)
def respond(
message,
history,
system_message,
max_tokens,
temperature,
top_p,
frequency_penalty,
seed
):
# Process the incoming message
print(f"Received message: {message}")
print(f"History: {history}")
print(f"System Message: {system_message}")
print(f"Max Tokens: {max_tokens}, Temperature: {temperature}, Top P: {top_p}")
print(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}")
# Convert seed to None if -1 (random)
if seed == -1:
seed = None
# Construct the messages list for the API
messages = [{"role": "system", "content": system_message}]
# Add conversation history to the context
for user_message, assistant_message in history:
if user_message:
messages.append({"role": "user", "content": user_message})
print(f"Added user message: {user_message}")
if assistant_message:
messages.append({"role": "assistant", "content": assistant_message})
print(f"Added assistant message: {assistant_message}")
# Append the latest message
messages.append({"role": "user", "content": message})
# Initialize response
response = ""
# Make the API request
for chunk in client.chat.completions.create(
model="meta-llama/Llama-3.3-70B-Instruct",
messages=messages,
max_tokens=max_tokens,
temperature=temperature,
top_p=top_p,
frequency_penalty=frequency_penalty,
seed=seed,
stream=True,
):
# Extract the token text from the response chunk
token = chunk.choices[0].message.content
response += token
yield response
# Create the Gradio Chatbot component
chatbot = gr.Chatbot(height=600)
# Define the Gradio ChatInterface
demo = gr.ChatInterface(
chatbot=chatbot,
fn=respond,
inputs=[
gr.Textbox(lines=1, placeholder="Enter your message..."),
gr.Chatbot(label="Conversation History"),
gr.Textbox(label="System Message"),
gr.Slider(minimum=10, maximum=200, step=1, label="Max Tokens"),
gr.Slider(minimum=0, maximum=2, step=0.1, label="Temperature"),
gr.Slider(minimum=0, maximum=1, step=0.05, label="Top P"),
gr.Slider(minimum=-2, maximum=2, step=0.1, label="Frequency Penalty"),
gr.Slider(minimum=-1, maximum=1000000, step=1, label="Seed (-1 for random)"),
],
theme="Nymbo/Nymbo_Theme",
)
# Create the "Featured Models" accordion
with gr.Accordion("Featured Models", open=True) as featured_models:
# Textbox for searching models
model_search = gr.Textbox(label="Filter Models")
# List of featured models
models = [
"meta-llama/Llama-3.3-70B-Instruct",
"meta-llama/Llama-2-70B-Chat-hf",
"TheBloke/Llama-2-13B-Chat-GGML",
"TheBloke/Llama-2-70B-Chat-GGML",
"TheBloke/Llama-2-13B-Chat-GGML-v2",
"TheBloke/Llama-2-70B-Chat-GGML-v2",
"TheBloke/Llama-2-70B-Chat-HF-API-compatible-GGML",
"TheBloke/Llama-2-70b-chat-hf",
"TheBloke/Llama-2-70B-Chat-GGML-v2-32K",
"TheBloke/Llama-2-13B-Chat-GGML-v2-32K",
"TheBloke/Llama-2-70B-Chat-GGML-v2-32K",
"TheBloke/Llama-2-13B-Chat-GGML-v2-32K",
"TheBloke/Llama-2-70B-Chat-GGML-v2-32K",
"TheBloke/Llama-7-13B-Chat-GGML-v2-32K",
"TheBloke/Llama-2-70B-Chat-GGML-v2-32K",
"TheBloke/Llama-2-13B-Chat-GGML-v2-32K",
"TheBloke/Llama-2-70B-Chat-GGML-v2-32K",
# Add more models as needed...
]
# Radio buttons for selecting a model
model_radio = gr.Radio(choices=models, label="Select a Model")
# Update the model list based on search input
def filter_models(search_term):
filtered_models = [model for model in models if search_term.lower() in model.lower()]
return gr.update(choices=filtered_models)
# Update the model list when the search box is used
model_search.change(filter_models, inputs=model_search, outputs=model_radio)
# Create a "Custom Model" textbox
custom_model = gr.Textbox(label="Custom Model", placeholder="Hugging Face model path")
# Create the "Information" tab
with gr.Tab("Information"):
# Featured Models accordion
with gr.Accordion("Featured Models", open=False):
gr.Markdown(
"""
# Featured Models
Here's a list of some popular models available on Hugging Face:
- meta-llama/Llama-3.3-70B-Instruct
- meta-llama/Llama-2-70B-Chat-hf
- TheBloke/Llama-2-13B-Chat-GGML
- TheBloke/Llama-2-70B-Chat-GGML
- TheBloke/Llama-2-13B-Chat-GGML-v2
- TheBloke/Llama-2-70B-Chat-GGML-v2
- ... (and many more)
You can search and select a model from the list above, or use your own custom model path.
"""
)
# Parameters Overview accordion
with gr.Accordion("Parameters Overview", open=False):
gr.Markdown(
"""
# Parameters Overview
Here's a brief explanation of the parameters you can adjust:
- **Max Tokens**: The maximum number of tokens to generate in the response.
- **Temperature**: Controls the randomness of the output. Higher values make the output more random.
- **Top P**: Also known as nucleus sampling, it filters the least probable tokens, encouraging the model to be more creative.
- **Frequency Penalty**: Penalizes repeated tokens to avoid repetition.
- **Seed**: A fixed seed for reproducibility. Use -1 for a random seed.
Feel free to experiment with these settings to achieve the desired output.
"""
)
# Launch the Gradio interface
demo.launch(share=True) |