Spaces:
Sleeping
Sleeping
File size: 2,655 Bytes
038f313 fab24df c5a20a4 038f313 db00df1 0ef95ea c6bdd15 038f313 0ef95ea 038f313 27c8b8d b633776 c5a20a4 038f313 0ef95ea 19532c8 b633776 c5a20a4 19532c8 27c8b8d 19532c8 27c8b8d 19532c8 2766a54 19532c8 0ef95ea 19532c8 0ef95ea b633776 0ef95ea ca486cf a8fc89d b633776 19532c8 b0cbd1c a8fc89d 30153c5 a8fc89d 30153c5 b633776 198b116 a8fc89d 30153c5 901bafe 769901b 77298b9 19532c8 db8b55b b633776 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 |
import gradio as gr
from openai import OpenAI
import os
ACCESS_TOKEN = os.getenv("HF_TOKEN")
print("Access token loaded.")
client = OpenAI(
base_url="https://api-inference.huggingface.co/v1/",
api_key=ACCESS_TOKEN,
)
print("OpenAI client initialized.")
def respond(
message,
history: list[tuple[str, str]],
system_message,
max_tokens,
temperature,
top_p,
frequency_penalty,
seed,
custom_model
):
print(f"Received message: {message}")
print(f"History: {history}")
print(f"System message: {system_message}")
if seed == -1:
seed = None
messages = [{"role": "system", "content": system_message}]
for val in history:
if val[0]:
messages.append({"role": "user", "content": val[0]})
if val[1]:
messages.append({"role": "assistant", "content": val[1]})
messages.append({"role": "user", "content": message})
model_to_use = custom_model.strip() if custom_model.strip() != "" else "meta-llama/Llama-3.1-8B-Instruct"
response = ""
for message_chunk in client.chat.completions.create(
model=model_to_use,
max_tokens=max_tokens,
stream=True,
temperature=temperature,
top_p=top_p,
frequency_penalty=frequency_penalty,
seed=seed,
messages=messages,
):
token_text = message_chunk.choices[0].delta.content
response += token_text
yield response
chatbot = gr.Chatbot(height=600, show_copy_button=True, placeholder="ChatGPT is initializing...", likeable=True, layout="panel")
system_message_box = gr.Label(value="You can select Max Tokens, Temperature, Top-P, Seed")
max_tokens_slider = gr.Slider(1024, 2048, value=1024, step=100, label="Max new tokens")
temperature_slider = gr.Slider(0.1, 1.0, value=0.7, step=0.1, label="Temperature")
top_p_slider = gr.Slider(0.1, 1.0, value=0.95, step=0.05, label="Top-P")
frequency_penalty_slider = gr.Slider(-2.0, 2.0, value=0.0, step=0.1, label="Frequency Penalty")
seed_slider = gr.Slider(-1, 65535, value=-1, step=1, label="Seed (-1 for random)")
custom_model_box = gr.Textbox(value="meta-llama/Llama-3.2-3B-Instruct", label="AI Mode is ")
demo = gr.ChatInterface(
fn=respond,
additional_inputs=[
system_message_box,
max_tokens_slider,
temperature_slider,
top_p_slider,
frequency_penalty_slider,
seed_slider,
custom_model_box,
],
fill_height=True,
chatbot=chatbot,
theme="Nymbo/Nymbo_Theme",
)
if __name__ == "__main__":
print("Launching the ChatGPT-Llama...")
demo.launch()
|