File size: 6,726 Bytes
2677642
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
# MCP-Powered Culinary Voice Assistant
# Hugging Face Space Implementation

import gradio as gr
import numpy as np
from mcp.server.fastmcp import FastMCP
from agents import Agent, trace
from agents.mcp import MCPServerSse, MCPServerStdio
from agents.voice import VoicePipeline, TTSModelSettings, AudioInput
import sqlite3
import json
import requests
from PIL import Image
import io

# ------ Custom MCP Cooking Tools Server ------
mcp = FastMCP("Culinary Tools Server")

@mcp.tool()
def get_recipe_by_ingredients(ingredients: list) -> dict:
    """Find recipes based on available ingredients"""
    print(f"[Culinary Server] Finding recipes with: {', '.join(ingredients)}")
    # In a real implementation, this would call a recipe API
    return {
        "recipes": [
            {"name": "Vegetable Stir Fry", "time": 20, "difficulty": "Easy"},
            {"name": "Pasta Primavera", "time": 30, "difficulty": "Medium"}
        ]
    }

@mcp.tool()
def get_recipe_image(recipe_name: str) -> str:
    """Generate an image of the finished recipe"""
    print(f"[Culinary Server] Generating image for: {recipe_name}")
    # This would call DALL-E or Stable Diffusion in production
    return "https://example.com/recipe-image.jpg"

@mcp.tool()
def convert_measurements(amount: float, from_unit: str, to_unit: str) -> dict:
    """Convert cooking measurements between units"""
    print(f"[Culinary Server] Converting {amount} {from_unit} to {to_unit}")
    # Simple conversion logic - real implementation would handle more units
    conversions = {
        ("tbsp", "tsp"): lambda x: x * 3,
        ("cups", "ml"): lambda x: x * 240,
        ("oz", "g"): lambda x: x * 28.35
    }
    conversion_key = (from_unit.lower(), to_unit.lower())
    if conversion_key in conversions:
        return {"result": conversions[conversion_key](amount), "unit": to_unit}
    return {"error": "Conversion not supported"}

# ------ Recipe Database (SQLite) ------
def init_recipe_db():
    conn = sqlite3.connect('file:recipes.db?mode=memory&cache=shared', uri=True)
    c = conn.cursor()
    c.execute('''CREATE TABLE IF NOT EXISTS recipes
                 (id INTEGER PRIMARY KEY, name TEXT, ingredients TEXT, instructions TEXT, prep_time INT)''')
    
    # Sample recipes
    recipes = [
        ("Classic Pancakes", "['flour', 'eggs', 'milk', 'baking powder']", 
         "1. Mix dry ingredients\n2. Add wet ingredients\n3. Cook on griddle", 15),
        ("Tomato Soup", "['tomatoes', 'onion', 'garlic', 'vegetable stock']", 
         "1. Sauté onions\n2. Add tomatoes\n3. Simmer and blend", 30)
    ]
    
    c.executemany("INSERT INTO recipes (name, ingredients, instructions, prep_time) VALUES (?,?,?,?)", recipes)
    conn.commit()
    return conn

# ------ Voice Assistant Setup ------
def create_culinary_agent(mcp_servers):
    """Create the culinary assistant agent"""
    culinary_agent = Agent(
        name="ChefAssistant",
        instructions="""
        You are a professional chef assistant. Help users with cooking tasks:
        1. Use get_recipe_by_ingredients when users have specific ingredients
        2. Use get_recipe_details for known recipes
        3. Use convert_measurements for unit conversions
        4. Use get_recipe_image when the user asks to see a dish
        5. Keep responses concise and practical for kitchen use
        6. Use a warm, encouraging tone suitable for cooking
        """,
        mcp_servers=mcp_servers,
        model="gpt-4.1-mini",
    )
    return culinary_agent

# ------ Gradio Interface ------
def process_voice_command(audio, state):
    """Process voice command through the agent system"""
    sr, audio_data = audio
    audio_array = (audio_data / np.iinfo(audio_data.dtype).max).astype(np.float32)
    
    # Initialize on first run
    if state is None:
        init_recipe_db()
        state = {
            "mcp_servers": [],
            "agent": None,
            "voice_pipeline": VoicePipeline(
                workflow=None,
                config=VoicePipelineConfig(
                    tts_settings=TTSModelSettings(
                        instructions="Warm, encouraging chef voice"
                    )
                )
            )
        }
        
        # Start MCP servers
        with MCPServerSse(
            name="Culinary Tools",
            params={"url": "http://localhost:8000/sse"},
            client_session_timeout_seconds=15,
        ) as culinary_server:
            with MCPServerStdio(
                params={"command": "uvx", "args": ["mcp-server-sqlite", "--db-path", "file:recipes.db?mode=memory&cache=shared"]},
            ) as db_server:
                state["mcp_servers"] = [culinary_server, db_server]
                state["agent"] = create_culinary_agent(state["mcp_servers"])
    
    # Process audio through agent
    audio_input = AudioInput(buffer=audio_array, sample_rate=sr)
    response = state["voice_pipeline"].run(state["agent"], audio_input)
    
    # For demo purposes, return mock response
    return (
        "https://example.com/response.wav", 
        "I found 3 recipes for your ingredients! Vegetable Stir Fry (20 mins) and Pasta Primavera (30 mins).",
        "https://example.com/stir-fry.jpg",
        state
    )

# ------ Hugging Face Space UI ------
with gr.Blocks(title="MCP Culinary Voice Assistant") as demo:
    state = gr.State(value=None)
    
    with gr.Row():
        gr.Markdown("# 🧑‍🍳 MCP-Powered Culinary Voice Assistant")
    
    with gr.Row():
        audio_input = gr.Audio(source="microphone", type="numpy", label="Speak to Chef Assistant")
        audio_output = gr.Audio(label="Assistant Response", interactive=False)
    
    with gr.Row():
        text_output = gr.Textbox(label="Transcription", interactive=False)
        image_output = gr.Image(label="Recipe Image", interactive=False)
    
    with gr.Row():
        submit_btn = gr.Button("Process Command", variant="primary")
    
    submit_btn.click(
        fn=process_voice_command,
        inputs=[audio_input, state],
        outputs=[audio_output, text_output, image_output, state]
    )

    gr.Examples(
        examples=[
            ["What can I make with eggs and flour?", "", ""],
            ["Show me how tomato soup looks", "", ""],
            ["Convert 2 cups to milliliters", "", ""]
        ],
        inputs=[text_output],
        label="Example Queries"
    )

if __name__ == "__main__":
    # Start MCP server in background thread
    import threading
    server_thread = threading.Thread(target=mcp.run, kwargs={"transport": "sse"})
    server_thread.daemon = True
    server_thread.start()
    
    # Launch Gradio interface
    demo.launch(server_name="0.0.0.0", server_port=7860)