scratch_agent / scratch_VLM /scratch_agent /multi_Agent_scratch.py
WebashalarForML's picture
Upload 111 files
adadaa5 verified
raw
history blame
16.6 kB
# app.py
import json
# This would typically be an LLM API call
def call_llm(prompt_text, temperature=0.7, max_tokens=1024):
"""
Simulates an LLM API call. In a real application, this would interact
with a service like Google Gemini, OpenAI GPT, or Anthropic Claude.
"""
print(f"\n--- LLM Call ---")
print(f"Prompt: {prompt_text[:500]}...") # Print a truncated prompt
# Placeholder for actual LLM response logic
# For demonstration, we'll return a predefined structure based on the agent's role.
if "Role: User Intent & Query Interpreter" in prompt_text:
return {
"identified_objects": {
"Cat": {"type": "Sprite", "role": "player_character"},
"Obstacle": {"type": "Sprite", "role": "object_to_avoid"},
"Score": {"type": "Variable", "role": "game_score"}
},
"high_level_behaviors": [
{"entity": "Cat", "action": "jump", "trigger": "spacebar_press"},
{"entity": "Obstacle", "action": "appear_randomly", "frequency": "periodic", "movement": "left_across_screen"},
{"entity": "Score", "action": "increment", "trigger": "cat_jumps_over_obstacle"},
{"entity": "Game", "action": "end", "condition": "Cat_touches_Obstacle"}
],
"constraints": ["game_over_on_collision"]
}
elif "Role: Visual Context Analyzer" in prompt_text:
return {
"visual_elements": {
"cat_image.png": {"inferred_role": "Sprite", "properties": {"costume": "cat_default", "initial_size": "medium"}},
"obstacle_image.png": {"inferred_role": "Sprite", "properties": {"costume": "rock", "initial_size": "small"}},
"background_image.png": {"inferred_role": "Backdrop", "properties": {"background_name": "grassland"}}
},
"inferred_details": ["cat_is_player", "obstacle_is_moving_hazard"]
}
elif "Role: Game Logic & Object Planner" in prompt_text:
return {
"plan_id": "cat_jumping_game_v1",
"sprites": {
"Cat": {
"properties": {"x": -200, "y": -100, "costume": "cat_default"},
"behaviors": [
{"event": "when_key_pressed", "key": "space", "action_sequence": ["change_y_up", "change_y_down"]},
{"event": "when_touching_obstacle", "action_sequence": ["game_over"]}
]
},
"Obstacle": {
"properties": {"x": 250, "y": -100, "costume": "rock", "speed": 5},
"behaviors": [
{"event": "when_green_flag_clicked", "action_sequence": ["hide", "wait_random", "go_to_random_x", "show", "glide_left_until_edge", "hide", "reset_position_and_repeat"]},
]
}
},
"variables": {
"Score": {"initial_value": 0}
},
"backdrops": {
"Stage": {"name": "grassland"}
},
"game_flow": {
"start": "Green Flag Clicked",
"score_mechanic": "Increment Score when Cat jumps over Obstacle", # This is a high-level logic, needs decomposition
"game_over_condition": "Cat touches Obstacle",
"game_over_action": "Stop all scripts, display 'Game Over' message"
}
}
elif "Role: Scratch Block Generator" in prompt_text:
# Example of raw Scratch blocks (simplified JSON for demonstration)
return {
"Cat_Scripts": [
{
"type": "event_whenkeypressed",
"id": "event_1",
"fields": {"KEY_OPTION": ["space"]},
"next": {
"type": "motion_changeyby",
"id": "motion_1_up",
"inputs": {"DY": ["10"]},
"next": {
"type": "control_wait",
"id": "control_1_wait",
"inputs": {"DURATION": ["0.1"]},
"next": {
"type": "motion_changeyby",
"id": "motion_1_down",
"inputs": {"DY": ["-10"]},
"next": None
}
}
}
},
{
"type": "event_whenflagclicked",
"id": "event_2",
"next": {
"type": "control_forever",
"id": "control_2_forever",
"inputs": {
"SUBSTACK": {
"type": "sensing_touchingobject",
"id": "sensing_1",
"fields": {"TOUCHINGOBJECTMENU": ["_mouse_"]}, # Placeholder, should be Obstacle
"next": {
"type": "control_if",
"id": "control_3_if",
"inputs": {
"CONDITION": ["sensing_1"],
"SUBSTACK": {
"type": "control_stop",
"id": "control_4_stop",
"fields": {"STOP_OPTION": ["all"]},
"next": {
"type": "looks_sayforsecs",
"id": "looks_1_say",
"inputs": {"MESSAGE": ["Game Over!"], "SECS": ["2"]},
"next": None
}
}
}
}
}
}
}
}
],
"Obstacle_Scripts": [
# ... (simplified)
]
}
elif "Role: Code Validator & Refinement" in prompt_text:
# Simulate validation result
if "motion_1_up" in prompt_text and "motion_1_down" in prompt_text and "control_forever" in prompt_text:
return {
"validation_status": "PASS",
"errors": [],
"refined_blocks": "Validated Scratch JSON or visual representation"
}
else:
return {
"validation_status": "FAIL",
"errors": [{"type": "LogicalError", "description": "Jump sequence incomplete or missing 'touching obstacle' condition.", "agent_to_reprompt": "Game Logic & Object Planner"}],
"refined_blocks": None
}
return "LLM processing failed or returned unexpected output."
class AIAgent:
def __init__(self, name, role, description):
self.name = name
self.role = role
self.description = description
def generate_prompt(self, context, task_description):
# Basic prompt structure for each agent
prompt = f"Agent Name: {self.name}\n"
prompt += f"Role: {self.role}\n"
prompt += f"Description: {self.description}\n\n"
prompt += f"Context: {json.dumps(context, indent=2)}\n\n"
prompt += f"Task: {task_description}\n\n"
prompt += "Please provide your output in a structured JSON format as specified for your role."
return prompt
def execute(self, context, task_description):
prompt = self.generate_prompt(context, task_description)
response = call_llm(prompt)
return response
class MultiAgentSystem:
def __init__(self):
self.user_intent_agent = AIAgent(
name="UserIntent&QueryInterpreter",
role="User Intent & Query Interpreter",
description="Processes natural language input, extracts explicit and implicit requirements, identifies entities, actions, and constraints. Translates ambiguous human language into structured, machine-interpretable intent."
)
self.visual_context_agent = AIAgent(
name="VisualContextAnalyzer",
role="Visual Context Analyzer",
description="Processes image inputs, identifies objects, infers potential roles (sprites/backdrops), extracts visual properties, and infers missing details based on visual cues."
)
self.game_logic_planner_agent = AIAgent(
name="GameLogic&ObjectPlanner",
role="Game Logic & Object Planner",
description="Synthesizes a comprehensive game plan from structured intent and visual context. Defines objects, properties, and actions, mapping high-level game mechanics to Scratch-compatible concepts. Crucial for inferring logical connections and default behaviors."
)
self.scratch_block_generator_agent = AIAgent(
name="ScratchBlockGenerator",
role="Scratch Block Generator",
description="Translates the detailed game logic plan into specific Scratch code blocks. Accurately selects block categories and types, populates parameters, and arranges them in a valid, executable sequence. STRICTLY adheres to Scratch's block connection rules, parent-child relations, and hierarchical levels."
)
self.code_validator_agent = AIAgent(
name="CodeValidator&Refinement",
role="Code Validator & Refinement",
description="Reviews generated block sequences for logical consistency, completeness, and strict adherence to Scratch's execution model and syntax. Checks for correct block hierarchy, proper connections (parent-child), and identifies errors. Provides structured feedback for refinement."
)
def run_workflow(self, user_query, images=None):
print(f"Starting workflow for query: '{user_query}'")
# Step 1 & 2: User Input, Intent & Visual Analysis (Parallel & Consolidated)
intent_data = self.user_intent_agent.execute(
context={"user_input": user_query},
task_description="Analyze the user query to extract key entities, desired actions, and overarching intent. Classify intent (e.g., 'create a game', 'add feature')."
)
print(f"\nUser Intent: {json.dumps(intent_data, indent=2)}")
visual_data = {}
if images:
visual_data = self.visual_context_agent.execute(
context={"images_provided": images},
task_description="Process provided images to detect objects, infer roles (sprite/backdrop), and extract relevant visual properties. Correlate visual information with textual intent."
)
print(f"\nVisual Context: {json.dumps(visual_data, indent=2)}")
# Consolidate initial context for the planner
initial_context = {
"user_intent": intent_data,
"visual_context": visual_data
}
# Step 3: Game Logic Planning
game_plan = self.game_logic_planner_agent.execute(
context=initial_context,
task_description="Formulate a detailed game plan including sprites, backdrops, variables, properties, and high-level logical behaviors. Infer and complete any missing details based on common game patterns."
)
print(f"\nGame Plan: {json.dumps(game_plan, indent=2)}")
# Step 4: Scratch Block Generation
# Emphasize parent-child and level handling here for the LLM
block_generation_task = (
"Translate the detailed game plan into specific Scratch code blocks. "
"Ensure accurate selection, parameterization, and arrangement of blocks. "
"STRICTLY ADHERE to Scratch's block connection rules, especially for **parent-child relationships** "
"and **top-level vs. lower-level block placement**. "
"Hat blocks must be top-level. Stack blocks form sequential chains (parent-child). "
"C-blocks (e.g., 'if-then', 'repeat') must contain child blocks within their 'C' shape. "
"Boolean and Reporter blocks must fit into their designated slots as child inputs. "
"Output in a JSON format consistent with Scratch's internal representation. "
"Example of parent-child/level: "
" Event block (Hat, top-level) -> "
" Forever block (C-block, child of Event, parent of inner blocks) -> "
" Motion block (Stack, child of Forever) -> "
" Looks block (Stack, child of Motion)"
)
raw_scratch_blocks = self.scratch_block_generator_agent.execute(
context={"game_plan": game_plan},
task_description=block_generation_task
)
print(f"\nRaw Scratch Blocks (JSON): {json.dumps(raw_scratch_blocks, indent=2)}")
# Step 5: Validation & Refinement
validation_report = self.code_validator_agent.execute(
context={"raw_scratch_blocks": raw_scratch_blocks, "original_game_plan": game_plan},
task_description=(
"Perform rigorous static analysis on the generated Scratch blocks. "
"Verify correct block hierarchy (e.g., Hat blocks at top, C-blocks containing children). "
"Ensure proper block connections, validating parent-child relationships and sequential flow. "
"Check for logical consistency, completeness against the game plan, and absence of orphaned/disconnected blocks. "
"Identify potential errors such as uninitialized variables or infinite loops without exit conditions. "
"If errors are found, suggest specific refinements or indicate which upstream agent needs to be re-prompted (e.g., 'Game Logic & Object Planner' for fundamental logic flaws)."
)
)
print(f"\nValidation Report: {json.dumps(validation_report, indent=2)}")
# Iterative refinement loop (simplified for this example)
if validation_report.get("validation_status") == "FAIL":
print("\nValidation failed. Initiating refinement process (simplified).")
# In a real system, this would trigger a more complex re-prompting loop
# based on validation_report.get("agent_to_reprompt")
# For now, just indicate that refinement is needed.
return {"status": "Refinement Needed", "details": validation_report["errors"]}
else:
print("\nScratch blocks generated and validated successfully!")
return {"status": "Success", "final_scratch_json": raw_scratch_blocks}
if __name__ == "__main__":
system = MultiAgentSystem()
# Example 1: Simple Cat Jumping Game
user_query_1 = "Make a cat jumping game with score count. The cat jumps when I press spacebar. An obstacle appears and moves across the screen. Score increases when the cat jumps over the obstacle. Game over when cat touches obstacle."
images_1 = ["cat_image.png", "obstacle_image.png", "background_image.png"]
result_1 = system.run_workflow(user_query_1, images_1)
print(f"\n--- Final Result 1 ---")
print(json.dumps(result_1, indent=2))
print("\n" + "="*80 + "\n")
# Example 2: Modified Behavior (demonstrates potential refinement need)
# This example specifically aims to highlight how the validation could catch
# issues if the block generation doesn't handle parent-child correctly.
# The simulated LLM responses will (for now) still "pass" in a simplified manner,
# but the prompt emphasizes the structural requirements.
user_query_2 = "Create a game where a character moves left and right, and collects coins. Display the score."
images_2 = ["character_image.png", "coin_image.png"]
result_2 = system.run_workflow(user_query_2, images_2)
print(f"\n--- Final Result 2 ---")
print(json.dumps(result_2, indent=2))