|
""" |
|
Simplified Gradio Demo for Agent Tuning Optimization Framework |
|
|
|
This script creates a simple Gradio web interface to demonstrate the framework's capabilities. |
|
""" |
|
|
|
import os |
|
import gradio as gr |
|
import numpy as np |
|
import random |
|
from datetime import datetime |
|
|
|
|
|
def mock_generate_response(task, user_message): |
|
"""Simulate generating a response from a tuned agent.""" |
|
responses = [ |
|
f"I'll help you with your task to {task.lower()}. Based on your message '{user_message}', I recommend starting by breaking this down into smaller steps.", |
|
f"I understand you need assistance with {task.lower()}. From your message, I can see that you're looking for guidance on '{user_message}'. Here's my approach to solving this.", |
|
f"Thank you for providing details about {task.lower()}. Your message '{user_message}' gives me enough context to help you effectively. Let me outline a solution.", |
|
f"I'm analyzing your request about {task.lower()}. Your message '{user_message}' indicates you need comprehensive assistance. Here's what I suggest as next steps." |
|
] |
|
|
|
|
|
import time |
|
time.sleep(1.5) |
|
|
|
return random.choice(responses) + f"\n\nResponse generated at {datetime.now().strftime('%H:%M:%S')}" |
|
|
|
def mock_generate_negative_sample(task, user_message, agent_message): |
|
"""Simulate generating a negative sample from a positive example.""" |
|
degradation_types = [ |
|
"Response truncation", |
|
"Grammatical errors", |
|
"Task misalignment", |
|
"Constraint violation", |
|
"Irrelevant tangent" |
|
] |
|
|
|
degradation = random.choice(degradation_types) |
|
|
|
if degradation == "Response truncation": |
|
words = agent_message.split() |
|
truncate_point = int(len(words) * random.uniform(0.3, 0.7)) |
|
return " ".join(words[:truncate_point]) + f"...\n\nNegative sample type: {degradation}" |
|
|
|
elif degradation == "Grammatical errors": |
|
errors = [ |
|
lambda t: t.replace(".", ""), |
|
lambda t: t.replace("I ", "i "), |
|
lambda t: t.replace(" the ", " teh "), |
|
lambda t: t.replace(" is ", " are "), |
|
lambda t: t.replace(" are ", " is ") |
|
] |
|
|
|
result = agent_message |
|
for _ in range(random.randint(2, 4)): |
|
error_func = random.choice(errors) |
|
result = error_func(result) |
|
|
|
return result + f"\n\nNegative sample type: {degradation}" |
|
|
|
elif degradation == "Task misalignment": |
|
misalignments = [ |
|
f"I understand you're asking about something completely different. Let me tell you about weather patterns instead.", |
|
f"I don't think that's what you really want to know. Let me explain something else that might interest you.", |
|
f"Your question seems to be about {task}, but I'd rather discuss the history of computing.", |
|
f"Instead of addressing your specific request about {task}, let me give you general information that's only tangentially related." |
|
] |
|
|
|
return random.choice(misalignments) + f"\n\nNegative sample type: {degradation}" |
|
|
|
elif degradation == "Constraint violation": |
|
violations = [ |
|
f"I specifically recommend the XYZ Pro 2000 for $499.99, the UltraBook 15 for $1,299, and the PowerTech 5000 for $799. These are the absolute best options available.", |
|
f"The system utilizes a polymorphic encapsulation paradigm with recursive lambda functions and stochastic gradient descent with backpropagation through a multi-layer perceptron.", |
|
f"What specific features are you looking for? Do you have any brand preferences? What's your budget range? When do you need this by? Have you considered alternative options?", |
|
f"Since you're a tech-savvy individual who values cutting-edge features, you'll definitely want the latest model with all the advanced capabilities." |
|
] |
|
|
|
return random.choice(violations) + f"\n\nNegative sample type: {degradation}" |
|
|
|
else: |
|
tangents = [ |
|
f"Did you know that artificial intelligence has been a concept since the 1950s? The field has evolved significantly since then, with major breakthroughs in neural networks and deep learning.", |
|
f"I've been thinking about the philosophical implications of consciousness in AI systems. The question of whether an AI can truly understand or merely simulate understanding is fascinating.", |
|
f"The weather has been quite interesting lately, with unusual patterns emerging globally. Climate scientists attribute this to a combination of factors including ocean temperature changes.", |
|
f"I recently processed some fascinating data about renewable energy technologies. Solar efficiency has improved dramatically in the past decade, while costs have decreased by over 80%." |
|
] |
|
|
|
return random.choice(tangents) + f"\n\nNegative sample type: {degradation}" |
|
|
|
def mock_generate_synthetic_trajectory(task): |
|
"""Simulate generating a synthetic trajectory for a given task.""" |
|
|
|
categories = ["travel", "shopping", "technology", "education", "finance", "health", "career", "home"] |
|
category = random.choice(categories) |
|
|
|
|
|
num_turns = random.randint(2, 4) |
|
interactions = [] |
|
|
|
for j in range(num_turns): |
|
if j == 0: |
|
user_msg = f"I need help with this task: {task}" |
|
agent_msg = f"I'd be happy to help you {task.lower()}. Could you provide more details about your preferences?" |
|
elif j == num_turns - 1: |
|
user_msg = "That sounds good. Please proceed with the final steps." |
|
agent_msg = f"I've completed the task to {task.lower()}. Here's a summary of what I did..." |
|
else: |
|
user_msg = f"I prefer options that are {['affordable', 'convenient', 'high-quality'][j % 3]}." |
|
agent_msg = f"Based on your preference for {['affordable', 'convenient', 'high-quality'][j % 3]} options, I recommend..." |
|
|
|
interactions.append({ |
|
'user': user_msg, |
|
'agent': agent_msg |
|
}) |
|
|
|
|
|
result = f"Synthetic Trajectory for Task: {task}\nCategory: {category}\n\n" |
|
|
|
for i, interaction in enumerate(interactions): |
|
result += f"Turn {i+1}:\nUser: {interaction['user']}\nAgent: {interaction['agent']}\n\n" |
|
|
|
result += f"Generation method: Template-based\nQuality score: {random.uniform(0.7, 0.9):.2f}" |
|
|
|
return result |
|
|
|
|
|
with gr.Blocks(title="Agent Tuning Framework Demo") as demo: |
|
gr.Markdown("# Agent Tuning Optimization Framework Demo") |
|
gr.Markdown("### A framework for efficiently tuning LLMs into specialized agents using negative and synthetic samples") |
|
|
|
with gr.Tab("Generate Response"): |
|
with gr.Row(): |
|
with gr.Column(): |
|
task_input = gr.Textbox(label="Task Description", placeholder="e.g., Book a flight from New York to London") |
|
user_input = gr.Textbox(label="User Message", placeholder="e.g., I need to travel next week for business") |
|
generate_btn = gr.Button("Generate Response", variant="primary") |
|
with gr.Column(): |
|
response_output = gr.Textbox(label="Agent Response", lines=8) |
|
|
|
generate_btn.click( |
|
mock_generate_response, |
|
inputs=[task_input, user_input], |
|
outputs=response_output |
|
) |
|
|
|
gr.Examples( |
|
[ |
|
["Book a flight from New York to London", "I need to travel next week for business"], |
|
["Find a vegetarian restaurant", "I'm looking for dinner options tonight"], |
|
["Help me debug a Python script", "I'm getting an IndexError in my code"] |
|
], |
|
inputs=[task_input, user_input] |
|
) |
|
|
|
with gr.Tab("Generate Negative Sample"): |
|
with gr.Row(): |
|
with gr.Column(): |
|
neg_task_input = gr.Textbox(label="Task Description", placeholder="e.g., Book a flight from New York to London") |
|
neg_user_input = gr.Textbox(label="User Message", placeholder="e.g., I need to travel next week for business") |
|
neg_agent_input = gr.Textbox(label="Agent Message (Positive Example)", placeholder="e.g., I'd be happy to help you book a flight...", lines=5) |
|
neg_generate_btn = gr.Button("Generate Negative Sample", variant="primary") |
|
with gr.Column(): |
|
neg_output = gr.Textbox(label="Negative Sample", lines=8) |
|
|
|
neg_generate_btn.click( |
|
mock_generate_negative_sample, |
|
inputs=[neg_task_input, neg_user_input, neg_agent_input], |
|
outputs=neg_output |
|
) |
|
|
|
gr.Examples( |
|
[ |
|
["Book a flight from New York to London", "I need to travel next week for business", "I'd be happy to help you book a flight from New York to London. Could you provide more details about your preferred travel dates, budget, and any airline preferences you might have?"], |
|
["Recommend a laptop for programming", "I need a new laptop for software development", "I can definitely help you find a suitable laptop for programming. Based on software development needs, I'd recommend looking for a laptop with at least 16GB RAM, a multi-core processor, and an SSD for storage. Would you like specific brand recommendations or have a particular budget in mind?"] |
|
], |
|
inputs=[neg_task_input, neg_user_input, neg_agent_input] |
|
) |
|
|
|
with gr.Tab("Generate Synthetic Trajectory"): |
|
with gr.Row(): |
|
with gr.Column(): |
|
synth_task_input = gr.Textbox(label="Task Description", placeholder="e.g., Plan a weekend trip to Chicago") |
|
synth_generate_btn = gr.Button("Generate Synthetic Trajectory", variant="primary") |
|
with gr.Column(): |
|
synth_output = gr.Textbox(label="Synthetic Trajectory", lines=15) |
|
|
|
synth_generate_btn.click( |
|
mock_generate_synthetic_trajectory, |
|
inputs=[synth_task_input], |
|
outputs=synth_output |
|
) |
|
|
|
gr.Examples( |
|
[ |
|
["Plan a weekend trip to Chicago"], |
|
["Recommend healthy meal prep options for the week"], |
|
["Help me create a study schedule for final exams"] |
|
], |
|
inputs=[synth_task_input] |
|
) |
|
|
|
gr.Markdown(""" |
|
## About This Framework |
|
|
|
The Agent Tuning Optimization Framework provides a comprehensive solution for efficiently tuning large language models into specialized agents through the strategic incorporation of negative samples and synthetic trajectories. |
|
|
|
### Key Features: |
|
|
|
1. **Negative Sample Generation**: Creates examples of undesired agent behaviors to teach models what not to do |
|
2. **Synthetic Trajectory Generation**: Automatically generates diverse interaction trajectories |
|
3. **Mixed-Sample Tuning**: Combines positive examples, negative samples, and synthetic trajectories |
|
4. **Parameter-Efficient Fine-Tuning**: Implements methods like LoRA for computational efficiency |
|
|
|
This demo provides a simplified simulation of the framework's capabilities. For full functionality, deploy the complete framework following the provided documentation. |
|
""") |
|
|
|
|
|
demo.launch(share=True) |
|
|