File size: 9,337 Bytes
2677642
 
 
 
7cd9628
1155ca4
2677642
7cd9628
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2677642
7cd9628
 
 
 
 
2677642
7cd9628
1155ca4
2677642
 
 
 
 
 
 
7cd9628
2677642
1155ca4
 
 
 
 
 
2677642
7cd9628
2677642
1155ca4
2677642
 
 
 
 
 
 
1155ca4
 
2677642
 
7cd9628
2677642
7cd9628
2677642
7cd9628
2677642
 
 
7cd9628
2677642
7cd9628
1155ca4
 
 
2677642
 
 
 
 
 
7cd9628
 
 
 
1155ca4
 
 
 
 
7cd9628
 
 
1155ca4
 
 
 
 
7cd9628
 
 
 
1155ca4
7cd9628
1155ca4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7cd9628
1155ca4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7cd9628
 
 
 
1155ca4
 
 
 
 
7cd9628
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2677642
 
7cd9628
2677642
7cd9628
 
2677642
7cd9628
 
2677642
1155ca4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7cd9628
1155ca4
2677642
7cd9628
1155ca4
7cd9628
1155ca4
 
2677642
 
 
1155ca4
7cd9628
2677642
 
1155ca4
 
 
 
 
2677642
 
 
 
 
 
 
7cd9628
 
2677642
 
 
 
7cd9628
 
1155ca4
 
2677642
 
 
 
 
 
7cd9628
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
import gradio as gr
import numpy as np
import sqlite3
import json
import time
from PIL import Image, ImageDraw

# ------ Mock MCP Server Implementation ------
class MockMCPServer:
    def __init__(self):
        self.tools = {}
        
    def register_tool(self, name, func, description):
        self.tools[name] = {
            "function": func,
            "description": description
        }
    
    def call_tool(self, tool_name, params):
        if tool_name in self.tools:
            return self.tools[tool_name]["function"](**params)
        return {"error": f"Tool {tool_name} not found"}

# ------ Create Mock MCP Server ------
mcp_server = MockMCPServer()

# ------ Tool Implementations ------
def get_recipe_by_ingredients(ingredients):
    """Find recipes based on available ingredients"""
    # In a real implementation, this would call an API
    print(f"Searching recipes with ingredients: {ingredients}")
    return {
        "recipes": [
            {"name": "Vegetable Stir Fry", "time": 20, "difficulty": "Easy"},
            {"name": "Pasta Primavera", "time": 30, "difficulty": "Medium"}
        ]
    }

def get_recipe_image(recipe_name):
    """Generate an image of the finished recipe"""
    print(f"Generating image for: {recipe_name}")
    # Create a placeholder image with the recipe name
    img = Image.new('RGB', (300, 200), color=(73, 109, 137))
    d = ImageDraw.Draw(img)
    d.text((10,10), f"Image of: {recipe_name}", fill=(255,255,0))
    return img

def convert_measurements(amount, from_unit, to_unit):
    """Convert cooking measurements between units"""
    print(f"Converting {amount} {from_unit} to {to_unit}")
    conversions = {
        ("tbsp", "tsp"): lambda x: x * 3,
        ("cups", "ml"): lambda x: x * 240,
        ("oz", "g"): lambda x: x * 28.35
    }
    conversion_key = (from_unit.lower(), to_unit.lower())
    if conversion_key in conversions:
        result = conversions[conversion_key](amount)
        return {"result": round(result, 2), "unit": to_unit}
    return {"error": "Conversion not supported"}

# ------ Recipe Database ------
def init_recipe_db():
    conn = sqlite3.connect(':memory:')
    c = conn.cursor()
    c.execute('''CREATE TABLE recipes
                 (id INTEGER PRIMARY KEY, name TEXT, ingredients TEXT, instructions TEXT, prep_time INT)''')
    
    recipes = [
        ("Classic Pancakes", json.dumps(["flour", "eggs", "milk", "baking powder"]), 
         "1. Mix dry ingredients\n2. Add wet ingredients\n3. Cook on griddle", 15),
        ("Tomato Soup", json.dumps(["tomatoes", "onion", "garlic", "vegetable stock"]), 
         "1. Sauté onions\n2. Add tomatoes\n3. Simmer and blend", 30),
        ("Chocolate Cake", json.dumps(["flour", "sugar", "cocoa", "eggs", "milk"]), 
         "1. Mix dry ingredients\n2. Add wet ingredients\n3. Bake at 350°F", 45)
    ]
    
    c.executemany("INSERT INTO recipes (name, ingredients, instructions, prep_time) VALUES (?,?,?,?)", recipes)
    conn.commit()
    return conn

# ------ Voice Processing Functions ------
def text_to_speech(text):
    """Mock TTS function - in real use, replace with actual TTS"""
    print(f"[TTS]: {text}")
    # Return dummy audio data (silence)
    duration = 2  # seconds
    sample_rate = 44100
    samples = np.zeros(int(duration * sample_rate), dtype=np.float32)
    return (sample_rate, samples)

def speech_to_text(audio):
    """Mock STT function - in real use, replace with actual STT"""
    # For now, we return a fixed string. In reality, we would process the audio
    sample_rate, audio_data = audio
    print(f"Received audio with sample rate {sample_rate} and shape {audio_data.shape}")
    # Return a fixed response for demo
    return "What can I make with eggs and flour?"

# ------ Agent Logic ------
def process_query(query, db_conn):
    """Process user query using the available tools"""
    print(f"Processing query: {query}")
    # Simple intent recognition
    if "recipe" in query.lower() or "make" in query.lower() or "cook" in query.lower():
        # Extract ingredients - very simple, just use some keywords
        ingredients = []
        for word in ["eggs", "flour", "milk", "tomatoes", "onion", "garlic"]:
            if word in query.lower():
                ingredients.append(word)
        if not ingredients:
            ingredients = ["eggs", "flour"]  # default
        return {
            "type": "recipes",
            "data": mcp_server.call_tool("get_recipe_by_ingredients", {"ingredients": ingredients})
        }
    elif "image" in query.lower() or "show" in query.lower() or "look" in query.lower():
        # Extract recipe name
        recipe_name = "Classic Pancakes"  # default
        for recipe in ["pancakes", "stir fry", "tomato soup", "chocolate cake"]:
            if recipe in query.lower():
                recipe_name = recipe
                break
        return {
            "type": "image",
            "data": mcp_server.call_tool("get_recipe_image", {"recipe_name": recipe_name})
        }
    elif "convert" in query.lower():
        # Extract amount and units - very simple
        # Assume pattern: convert <number> <unit> to <unit>
        words = query.split()
        try:
            amount = float(words[words.index("convert")+1])
            from_unit = words[words.index("convert")+2]
            to_unit = words[words.index("to")+1]
        except:
            amount = 2
            from_unit = "cups"
            to_unit = "ml"
        return {
            "type": "conversion",
            "data": mcp_server.call_tool("convert_measurements", {"amount": amount, "from_unit": from_unit, "to_unit": to_unit})
        }
    else:
        # Fallback to database search
        c = db_conn.cursor()
        c.execute("SELECT * FROM recipes WHERE name LIKE ?", (f"%{query}%",))
        recipes = c.fetchall()
        return {
            "type": "db_recipes",
            "data": recipes
        }

# ------ Register Tools with MCP Server ------
mcp_server.register_tool(
    "get_recipe_by_ingredients",
    get_recipe_by_ingredients,
    "Find recipes based on available ingredients"
)
mcp_server.register_tool(
    "get_recipe_image",
    get_recipe_image,
    "Generate an image of the finished recipe"
)
mcp_server.register_tool(
    "convert_measurements",
    convert_measurements,
    "Convert cooking measurements between units"
)

# ------ Initialize System ------
db_conn = init_recipe_db()

# ------ Gradio Interface ------
def process_voice_command(audio):
    """Process voice command through the agent system"""
    # Convert audio to text
    query = speech_to_text(audio)
    
    # Process query using agent logic
    result = process_query(query, db_conn)
    
    # Generate response text and image
    response_text = ""
    image = None
    
    if result["type"] == "recipes":
        recipes = result["data"]["recipes"]
        response_text = f"Found {len(recipes)} recipes:\n"
        for recipe in recipes:
            response_text += f"- {recipe['name']} ({recipe['time']} mins, {recipe['difficulty']})\n"
    elif result["type"] == "image":
        image = result["data"]  # This is a PIL image
        response_text = "Here is an image of the recipe!"
    elif result["type"] == "conversion":
        conv = result["data"]
        if "error" in conv:
            response_text = f"Error: {conv['error']}"
        else:
            response_text = f"{conv['result']} {conv['unit']}"
    elif result["type"] == "db_recipes":
        recipes = result["data"]
        if recipes:
            response_text = f"Found {len(recipes)} recipes in database:\n"
            for recipe in recipes:
                response_text += f"- {recipe[1]} ({recipe[4]} mins)\n"
        else:
            response_text = "No recipes found."
    else:
        response_text = "I'm not sure how to help with that."
    
    # Convert response to audio
    sr, audio_data = text_to_speech(response_text)
    
    # Return results: audio output, text, and image
    return (sr, audio_data), response_text, image

# ------ Hugging Face Space UI ------
with gr.Blocks(title="MCP Culinary Voice Assistant") as demo:
    gr.Markdown("# 🧑‍🍳 MCP-Powered Culinary Voice Assistant")
    gr.Markdown("Speak to your cooking assistant about recipes, conversions, and more!")
    
    with gr.Row():
        with gr.Column():
            audio_input = gr.Audio(source="microphone", type="numpy", label="Speak to Chef Assistant")
            submit_btn = gr.Button("Process Command", variant="primary")
        with gr.Column():
            audio_output = gr.Audio(label="Assistant Response", interactive=False)
    
    with gr.Row():
        text_output = gr.Textbox(label="Transcription", interactive=False)
        image_output = gr.Image(label="Recipe Image", interactive=False)
    
    submit_btn.click(
        fn=process_voice_command,
        inputs=[audio_input],
        outputs=[audio_output, text_output, image_output]
    )

    gr.Examples(
        examples=[
            ["What can I make with eggs and flour?"],
            ["Show me how tomato soup looks"],
            ["Convert 2 cups to milliliters"],
            ["Find chocolate cake recipes"]
        ],
        inputs=[text_output],
        label="Example Queries"
    )

if __name__ == "__main__":
    demo.launch()