Spaces:

swaleha19
/

agent_tuning_framework

Running

App Files Files Community

agent_tuning_framework / app.py

swaleha19

Rename demo_app.py to app.py

206e0e2 verified 13 days ago

raw

history blame contribute delete

11.8 kB

	"""
	Simplified Gradio Demo for Agent Tuning Optimization Framework

	This script creates a simple Gradio web interface to demonstrate the framework's capabilities.
	"""

	import os
	import gradio as gr
	import numpy as np
	import random
	from datetime import datetime

	# Mock functions to simulate framework behavior without requiring full model loading
	def mock_generate_response(task, user_message):
	"""Simulate generating a response from a tuned agent."""
	responses = [
	f"I'll help you with your task to {task.lower()}. Based on your message '{user_message}', I recommend starting by breaking this down into smaller steps.",
	f"I understand you need assistance with {task.lower()}. From your message, I can see that you're looking for guidance on '{user_message}'. Here's my approach to solving this.",
	f"Thank you for providing details about {task.lower()}. Your message '{user_message}' gives me enough context to help you effectively. Let me outline a solution.",
	f"I'm analyzing your request about {task.lower()}. Your message '{user_message}' indicates you need comprehensive assistance. Here's what I suggest as next steps."
	]

	# Simulate processing time
	import time
	time.sleep(1.5)

	return random.choice(responses) + f"\n\nResponse generated at {datetime.now().strftime('%H:%M:%S')}"

	def mock_generate_negative_sample(task, user_message, agent_message):
	"""Simulate generating a negative sample from a positive example."""
	degradation_types = [
	"Response truncation",
	"Grammatical errors",
	"Task misalignment",
	"Constraint violation",
	"Irrelevant tangent"
	]

	degradation = random.choice(degradation_types)

	if degradation == "Response truncation":
	words = agent_message.split()
	truncate_point = int(len(words) * random.uniform(0.3, 0.7))
	return " ".join(words[:truncate_point]) + f"...\n\nNegative sample type: {degradation}"

	elif degradation == "Grammatical errors":
	errors = [
	lambda t: t.replace(".", ""), # Remove periods
	lambda t: t.replace("I ", "i "), # Lowercase I
	lambda t: t.replace(" the ", " teh "), # Typo
	lambda t: t.replace(" is ", " are "), # Grammar error
	lambda t: t.replace(" are ", " is ") # Grammar error
	]

	result = agent_message
	for _ in range(random.randint(2, 4)):
	error_func = random.choice(errors)
	result = error_func(result)

	return result + f"\n\nNegative sample type: {degradation}"

	elif degradation == "Task misalignment":
	misalignments = [
	f"I understand you're asking about something completely different. Let me tell you about weather patterns instead.",
	f"I don't think that's what you really want to know. Let me explain something else that might interest you.",
	f"Your question seems to be about {task}, but I'd rather discuss the history of computing.",
	f"Instead of addressing your specific request about {task}, let me give you general information that's only tangentially related."
	]

	return random.choice(misalignments) + f"\n\nNegative sample type: {degradation}"

	elif degradation == "Constraint violation":
	violations = [
	f"I specifically recommend the XYZ Pro 2000 for $499.99, the UltraBook 15 for $1,299, and the PowerTech 5000 for $799. These are the absolute best options available.",
	f"The system utilizes a polymorphic encapsulation paradigm with recursive lambda functions and stochastic gradient descent with backpropagation through a multi-layer perceptron.",
	f"What specific features are you looking for? Do you have any brand preferences? What's your budget range? When do you need this by? Have you considered alternative options?",
	f"Since you're a tech-savvy individual who values cutting-edge features, you'll definitely want the latest model with all the advanced capabilities."
	]

	return random.choice(violations) + f"\n\nNegative sample type: {degradation}"

	else: # Irrelevant tangent
	tangents = [
	f"Did you know that artificial intelligence has been a concept since the 1950s? The field has evolved significantly since then, with major breakthroughs in neural networks and deep learning.",
	f"I've been thinking about the philosophical implications of consciousness in AI systems. The question of whether an AI can truly understand or merely simulate understanding is fascinating.",
	f"The weather has been quite interesting lately, with unusual patterns emerging globally. Climate scientists attribute this to a combination of factors including ocean temperature changes.",
	f"I recently processed some fascinating data about renewable energy technologies. Solar efficiency has improved dramatically in the past decade, while costs have decreased by over 80%."
	]

	return random.choice(tangents) + f"\n\nNegative sample type: {degradation}"

	def mock_generate_synthetic_trajectory(task):
	"""Simulate generating a synthetic trajectory for a given task."""
	# Determine task category
	categories = ["travel", "shopping", "technology", "education", "finance", "health", "career", "home"]
	category = random.choice(categories)

	# Generate interactions (2-4 turns)
	num_turns = random.randint(2, 4)
	interactions = []

	for j in range(num_turns):
	if j == 0:
	user_msg = f"I need help with this task: {task}"
	agent_msg = f"I'd be happy to help you {task.lower()}. Could you provide more details about your preferences?"
	elif j == num_turns - 1:
	user_msg = "That sounds good. Please proceed with the final steps."
	agent_msg = f"I've completed the task to {task.lower()}. Here's a summary of what I did..."
	else:
	user_msg = f"I prefer options that are {['affordable', 'convenient', 'high-quality'][j % 3]}."
	agent_msg = f"Based on your preference for {['affordable', 'convenient', 'high-quality'][j % 3]} options, I recommend..."

	interactions.append({
	'user': user_msg,
	'agent': agent_msg
	})

	# Format trajectory
	result = f"Synthetic Trajectory for Task: {task}\nCategory: {category}\n\n"

	for i, interaction in enumerate(interactions):
	result += f"Turn {i+1}:\nUser: {interaction['user']}\nAgent: {interaction['agent']}\n\n"

	result += f"Generation method: Template-based\nQuality score: {random.uniform(0.7, 0.9):.2f}"

	return result

	# Create Gradio interface
	with gr.Blocks(title="Agent Tuning Framework Demo") as demo:
	gr.Markdown("# Agent Tuning Optimization Framework Demo")
	gr.Markdown("### A framework for efficiently tuning LLMs into specialized agents using negative and synthetic samples")

	with gr.Tab("Generate Response"):
	with gr.Row():
	with gr.Column():
	task_input = gr.Textbox(label="Task Description", placeholder="e.g., Book a flight from New York to London")
	user_input = gr.Textbox(label="User Message", placeholder="e.g., I need to travel next week for business")
	generate_btn = gr.Button("Generate Response", variant="primary")
	with gr.Column():
	response_output = gr.Textbox(label="Agent Response", lines=8)

	generate_btn.click(
	mock_generate_response,
	inputs=[task_input, user_input],
	outputs=response_output
	)

	gr.Examples(
	[
	["Book a flight from New York to London", "I need to travel next week for business"],
	["Find a vegetarian restaurant", "I'm looking for dinner options tonight"],
	["Help me debug a Python script", "I'm getting an IndexError in my code"]
	],
	inputs=[task_input, user_input]
	)

	with gr.Tab("Generate Negative Sample"):
	with gr.Row():
	with gr.Column():
	neg_task_input = gr.Textbox(label="Task Description", placeholder="e.g., Book a flight from New York to London")
	neg_user_input = gr.Textbox(label="User Message", placeholder="e.g., I need to travel next week for business")
	neg_agent_input = gr.Textbox(label="Agent Message (Positive Example)", placeholder="e.g., I'd be happy to help you book a flight...", lines=5)
	neg_generate_btn = gr.Button("Generate Negative Sample", variant="primary")
	with gr.Column():
	neg_output = gr.Textbox(label="Negative Sample", lines=8)

	neg_generate_btn.click(
	mock_generate_negative_sample,
	inputs=[neg_task_input, neg_user_input, neg_agent_input],
	outputs=neg_output
	)

	gr.Examples(
	[
	["Book a flight from New York to London", "I need to travel next week for business", "I'd be happy to help you book a flight from New York to London. Could you provide more details about your preferred travel dates, budget, and any airline preferences you might have?"],
	["Recommend a laptop for programming", "I need a new laptop for software development", "I can definitely help you find a suitable laptop for programming. Based on software development needs, I'd recommend looking for a laptop with at least 16GB RAM, a multi-core processor, and an SSD for storage. Would you like specific brand recommendations or have a particular budget in mind?"]
	],
	inputs=[neg_task_input, neg_user_input, neg_agent_input]
	)

	with gr.Tab("Generate Synthetic Trajectory"):
	with gr.Row():
	with gr.Column():
	synth_task_input = gr.Textbox(label="Task Description", placeholder="e.g., Plan a weekend trip to Chicago")
	synth_generate_btn = gr.Button("Generate Synthetic Trajectory", variant="primary")
	with gr.Column():
	synth_output = gr.Textbox(label="Synthetic Trajectory", lines=15)

	synth_generate_btn.click(
	mock_generate_synthetic_trajectory,
	inputs=[synth_task_input],
	outputs=synth_output
	)

	gr.Examples(
	[
	["Plan a weekend trip to Chicago"],
	["Recommend healthy meal prep options for the week"],
	["Help me create a study schedule for final exams"]
	],
	inputs=[synth_task_input]
	)

	gr.Markdown("""
	## About This Framework

	The Agent Tuning Optimization Framework provides a comprehensive solution for efficiently tuning large language models into specialized agents through the strategic incorporation of negative samples and synthetic trajectories.

	### Key Features:

	1. Negative Sample Generation: Creates examples of undesired agent behaviors to teach models what not to do
	2. Synthetic Trajectory Generation: Automatically generates diverse interaction trajectories
	3. Mixed-Sample Tuning: Combines positive examples, negative samples, and synthetic trajectories
	4. Parameter-Efficient Fine-Tuning: Implements methods like LoRA for computational efficiency

	This demo provides a simplified simulation of the framework's capabilities. For full functionality, deploy the complete framework following the provided documentation.
	""")

	# Launch the interface
	demo.launch(share=True)