import gradio as gr from openai import OpenAI import os import time def predict(message, history, system_prompt, model, max_tokens, temperature, top_p): # Initialize the OpenAI client client = OpenAI( api_key=os.environ.get("API_TOKEN"), ) # Start with the system prompt messages = [{"role": "system", "content": system_prompt}] # Add the conversation history messages.extend(history if history else []) # Add the current user message messages.append({"role": "user", "content": message}) # Record the start time start_time = time.time() # Streaming response response = client.chat.completions.create( model=model, messages=messages, max_tokens=max_tokens, temperature=temperature, top_p=top_p, stop=None, stream=True ) full_message = "" first_chunk_time = None last_yield_time = None for chunk in response: if chunk.choices and chunk.choices[0].delta.content: if first_chunk_time is None: first_chunk_time = time.time() - start_time # Record time for the first chunk full_message += chunk.choices[0].delta.content current_time = time.time() chunk_time = current_time - start_time # calculate the time delay of the chunk print(f"Message received {chunk_time:.2f} seconds after request: {chunk.choices[0].delta.content}") if last_yield_time is None or (current_time - last_yield_time >= 0.25): yield full_message last_yield_time = current_time # Ensure to yield any remaining message that didn't meet the time threshold if full_message: total_time = time.time() - start_time # Append timing information to the response message full_message += f" (First Chunk: {first_chunk_time:.2f}s, Total: {total_time:.2f}s)" yield full_message gr.ChatInterface( fn=predict, type="messages", #save_history=True, #editable=True, additional_inputs=[ gr.Textbox("You are a helpful AI assistant.", label="System Prompt"), gr.Dropdown(["gpt-4o", "gpt-4o-mini"], label="Model"), gr.Slider(800, 4000, value=2000, label="Max Token"), gr.Slider(0, 1, value=0.7, label="Temperature"), gr.Slider(0, 1, value=0.95, label="Top P"), ], css="footer{display:none !important}" ).launch()