Spaces:
Sleeping
Sleeping
File size: 6,726 Bytes
2677642 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 |
# MCP-Powered Culinary Voice Assistant
# Hugging Face Space Implementation
import gradio as gr
import numpy as np
from mcp.server.fastmcp import FastMCP
from agents import Agent, trace
from agents.mcp import MCPServerSse, MCPServerStdio
from agents.voice import VoicePipeline, TTSModelSettings, AudioInput
import sqlite3
import json
import requests
from PIL import Image
import io
# ------ Custom MCP Cooking Tools Server ------
mcp = FastMCP("Culinary Tools Server")
@mcp.tool()
def get_recipe_by_ingredients(ingredients: list) -> dict:
"""Find recipes based on available ingredients"""
print(f"[Culinary Server] Finding recipes with: {', '.join(ingredients)}")
# In a real implementation, this would call a recipe API
return {
"recipes": [
{"name": "Vegetable Stir Fry", "time": 20, "difficulty": "Easy"},
{"name": "Pasta Primavera", "time": 30, "difficulty": "Medium"}
]
}
@mcp.tool()
def get_recipe_image(recipe_name: str) -> str:
"""Generate an image of the finished recipe"""
print(f"[Culinary Server] Generating image for: {recipe_name}")
# This would call DALL-E or Stable Diffusion in production
return "https://example.com/recipe-image.jpg"
@mcp.tool()
def convert_measurements(amount: float, from_unit: str, to_unit: str) -> dict:
"""Convert cooking measurements between units"""
print(f"[Culinary Server] Converting {amount} {from_unit} to {to_unit}")
# Simple conversion logic - real implementation would handle more units
conversions = {
("tbsp", "tsp"): lambda x: x * 3,
("cups", "ml"): lambda x: x * 240,
("oz", "g"): lambda x: x * 28.35
}
conversion_key = (from_unit.lower(), to_unit.lower())
if conversion_key in conversions:
return {"result": conversions[conversion_key](amount), "unit": to_unit}
return {"error": "Conversion not supported"}
# ------ Recipe Database (SQLite) ------
def init_recipe_db():
conn = sqlite3.connect('file:recipes.db?mode=memory&cache=shared', uri=True)
c = conn.cursor()
c.execute('''CREATE TABLE IF NOT EXISTS recipes
(id INTEGER PRIMARY KEY, name TEXT, ingredients TEXT, instructions TEXT, prep_time INT)''')
# Sample recipes
recipes = [
("Classic Pancakes", "['flour', 'eggs', 'milk', 'baking powder']",
"1. Mix dry ingredients\n2. Add wet ingredients\n3. Cook on griddle", 15),
("Tomato Soup", "['tomatoes', 'onion', 'garlic', 'vegetable stock']",
"1. Sauté onions\n2. Add tomatoes\n3. Simmer and blend", 30)
]
c.executemany("INSERT INTO recipes (name, ingredients, instructions, prep_time) VALUES (?,?,?,?)", recipes)
conn.commit()
return conn
# ------ Voice Assistant Setup ------
def create_culinary_agent(mcp_servers):
"""Create the culinary assistant agent"""
culinary_agent = Agent(
name="ChefAssistant",
instructions="""
You are a professional chef assistant. Help users with cooking tasks:
1. Use get_recipe_by_ingredients when users have specific ingredients
2. Use get_recipe_details for known recipes
3. Use convert_measurements for unit conversions
4. Use get_recipe_image when the user asks to see a dish
5. Keep responses concise and practical for kitchen use
6. Use a warm, encouraging tone suitable for cooking
""",
mcp_servers=mcp_servers,
model="gpt-4.1-mini",
)
return culinary_agent
# ------ Gradio Interface ------
def process_voice_command(audio, state):
"""Process voice command through the agent system"""
sr, audio_data = audio
audio_array = (audio_data / np.iinfo(audio_data.dtype).max).astype(np.float32)
# Initialize on first run
if state is None:
init_recipe_db()
state = {
"mcp_servers": [],
"agent": None,
"voice_pipeline": VoicePipeline(
workflow=None,
config=VoicePipelineConfig(
tts_settings=TTSModelSettings(
instructions="Warm, encouraging chef voice"
)
)
)
}
# Start MCP servers
with MCPServerSse(
name="Culinary Tools",
params={"url": "http://localhost:8000/sse"},
client_session_timeout_seconds=15,
) as culinary_server:
with MCPServerStdio(
params={"command": "uvx", "args": ["mcp-server-sqlite", "--db-path", "file:recipes.db?mode=memory&cache=shared"]},
) as db_server:
state["mcp_servers"] = [culinary_server, db_server]
state["agent"] = create_culinary_agent(state["mcp_servers"])
# Process audio through agent
audio_input = AudioInput(buffer=audio_array, sample_rate=sr)
response = state["voice_pipeline"].run(state["agent"], audio_input)
# For demo purposes, return mock response
return (
"https://example.com/response.wav",
"I found 3 recipes for your ingredients! Vegetable Stir Fry (20 mins) and Pasta Primavera (30 mins).",
"https://example.com/stir-fry.jpg",
state
)
# ------ Hugging Face Space UI ------
with gr.Blocks(title="MCP Culinary Voice Assistant") as demo:
state = gr.State(value=None)
with gr.Row():
gr.Markdown("# 🧑🍳 MCP-Powered Culinary Voice Assistant")
with gr.Row():
audio_input = gr.Audio(source="microphone", type="numpy", label="Speak to Chef Assistant")
audio_output = gr.Audio(label="Assistant Response", interactive=False)
with gr.Row():
text_output = gr.Textbox(label="Transcription", interactive=False)
image_output = gr.Image(label="Recipe Image", interactive=False)
with gr.Row():
submit_btn = gr.Button("Process Command", variant="primary")
submit_btn.click(
fn=process_voice_command,
inputs=[audio_input, state],
outputs=[audio_output, text_output, image_output, state]
)
gr.Examples(
examples=[
["What can I make with eggs and flour?", "", ""],
["Show me how tomato soup looks", "", ""],
["Convert 2 cups to milliliters", "", ""]
],
inputs=[text_output],
label="Example Queries"
)
if __name__ == "__main__":
# Start MCP server in background thread
import threading
server_thread = threading.Thread(target=mcp.run, kwargs={"transport": "sse"})
server_thread.daemon = True
server_thread.start()
# Launch Gradio interface
demo.launch(server_name="0.0.0.0", server_port=7860) |