# MCP-Powered Culinary Voice Assistant # Hugging Face Space Implementation import gradio as gr import numpy as np from mcp.server.fastmcp import FastMCP from agents import Agent, trace from agents.mcp import MCPServerSse, MCPServerStdio from agents.voice import VoicePipeline, TTSModelSettings, AudioInput import sqlite3 import json import requests from PIL import Image import io # ------ Custom MCP Cooking Tools Server ------ mcp = FastMCP("Culinary Tools Server") @mcp.tool() def get_recipe_by_ingredients(ingredients: list) -> dict: """Find recipes based on available ingredients""" print(f"[Culinary Server] Finding recipes with: {', '.join(ingredients)}") # In a real implementation, this would call a recipe API return { "recipes": [ {"name": "Vegetable Stir Fry", "time": 20, "difficulty": "Easy"}, {"name": "Pasta Primavera", "time": 30, "difficulty": "Medium"} ] } @mcp.tool() def get_recipe_image(recipe_name: str) -> str: """Generate an image of the finished recipe""" print(f"[Culinary Server] Generating image for: {recipe_name}") # This would call DALL-E or Stable Diffusion in production return "https://example.com/recipe-image.jpg" @mcp.tool() def convert_measurements(amount: float, from_unit: str, to_unit: str) -> dict: """Convert cooking measurements between units""" print(f"[Culinary Server] Converting {amount} {from_unit} to {to_unit}") # Simple conversion logic - real implementation would handle more units conversions = { ("tbsp", "tsp"): lambda x: x * 3, ("cups", "ml"): lambda x: x * 240, ("oz", "g"): lambda x: x * 28.35 } conversion_key = (from_unit.lower(), to_unit.lower()) if conversion_key in conversions: return {"result": conversions[conversion_key](amount), "unit": to_unit} return {"error": "Conversion not supported"} # ------ Recipe Database (SQLite) ------ def init_recipe_db(): conn = sqlite3.connect('file:recipes.db?mode=memory&cache=shared', uri=True) c = conn.cursor() c.execute('''CREATE TABLE IF NOT EXISTS recipes (id INTEGER PRIMARY KEY, name TEXT, ingredients TEXT, instructions TEXT, prep_time INT)''') # Sample recipes recipes = [ ("Classic Pancakes", "['flour', 'eggs', 'milk', 'baking powder']", "1. Mix dry ingredients\n2. Add wet ingredients\n3. Cook on griddle", 15), ("Tomato Soup", "['tomatoes', 'onion', 'garlic', 'vegetable stock']", "1. Sauté onions\n2. Add tomatoes\n3. Simmer and blend", 30) ] c.executemany("INSERT INTO recipes (name, ingredients, instructions, prep_time) VALUES (?,?,?,?)", recipes) conn.commit() return conn # ------ Voice Assistant Setup ------ def create_culinary_agent(mcp_servers): """Create the culinary assistant agent""" culinary_agent = Agent( name="ChefAssistant", instructions=""" You are a professional chef assistant. Help users with cooking tasks: 1. Use get_recipe_by_ingredients when users have specific ingredients 2. Use get_recipe_details for known recipes 3. Use convert_measurements for unit conversions 4. Use get_recipe_image when the user asks to see a dish 5. Keep responses concise and practical for kitchen use 6. Use a warm, encouraging tone suitable for cooking """, mcp_servers=mcp_servers, model="gpt-4.1-mini", ) return culinary_agent # ------ Gradio Interface ------ def process_voice_command(audio, state): """Process voice command through the agent system""" sr, audio_data = audio audio_array = (audio_data / np.iinfo(audio_data.dtype).max).astype(np.float32) # Initialize on first run if state is None: init_recipe_db() state = { "mcp_servers": [], "agent": None, "voice_pipeline": VoicePipeline( workflow=None, config=VoicePipelineConfig( tts_settings=TTSModelSettings( instructions="Warm, encouraging chef voice" ) ) ) } # Start MCP servers with MCPServerSse( name="Culinary Tools", params={"url": "http://localhost:8000/sse"}, client_session_timeout_seconds=15, ) as culinary_server: with MCPServerStdio( params={"command": "uvx", "args": ["mcp-server-sqlite", "--db-path", "file:recipes.db?mode=memory&cache=shared"]}, ) as db_server: state["mcp_servers"] = [culinary_server, db_server] state["agent"] = create_culinary_agent(state["mcp_servers"]) # Process audio through agent audio_input = AudioInput(buffer=audio_array, sample_rate=sr) response = state["voice_pipeline"].run(state["agent"], audio_input) # For demo purposes, return mock response return ( "https://example.com/response.wav", "I found 3 recipes for your ingredients! Vegetable Stir Fry (20 mins) and Pasta Primavera (30 mins).", "https://example.com/stir-fry.jpg", state ) # ------ Hugging Face Space UI ------ with gr.Blocks(title="MCP Culinary Voice Assistant") as demo: state = gr.State(value=None) with gr.Row(): gr.Markdown("# 🧑‍🍳 MCP-Powered Culinary Voice Assistant") with gr.Row(): audio_input = gr.Audio(source="microphone", type="numpy", label="Speak to Chef Assistant") audio_output = gr.Audio(label="Assistant Response", interactive=False) with gr.Row(): text_output = gr.Textbox(label="Transcription", interactive=False) image_output = gr.Image(label="Recipe Image", interactive=False) with gr.Row(): submit_btn = gr.Button("Process Command", variant="primary") submit_btn.click( fn=process_voice_command, inputs=[audio_input, state], outputs=[audio_output, text_output, image_output, state] ) gr.Examples( examples=[ ["What can I make with eggs and flour?", "", ""], ["Show me how tomato soup looks", "", ""], ["Convert 2 cups to milliliters", "", ""] ], inputs=[text_output], label="Example Queries" ) if __name__ == "__main__": # Start MCP server in background thread import threading server_thread = threading.Thread(target=mcp.run, kwargs={"transport": "sse"}) server_thread.daemon = True server_thread.start() # Launch Gradio interface demo.launch(server_name="0.0.0.0", server_port=7860)