File size: 7,099 Bytes
7cd9628
2677642
 
 
 
 
 
 
 
 
7cd9628
2677642
7cd9628
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2677642
7cd9628
 
 
 
 
2677642
7cd9628
2677642
 
 
 
 
 
 
7cd9628
2677642
7cd9628
 
 
 
 
2677642
7cd9628
2677642
 
 
 
 
 
 
 
 
 
 
7cd9628
2677642
7cd9628
2677642
7cd9628
2677642
 
 
7cd9628
2677642
7cd9628
2677642
 
 
 
 
 
 
7cd9628
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2677642
 
7cd9628
2677642
7cd9628
 
2677642
7cd9628
 
2677642
7cd9628
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2677642
7cd9628
 
 
 
2677642
7cd9628
 
 
2677642
 
 
 
7cd9628
 
2677642
 
 
 
 
 
 
 
 
 
 
 
 
 
7cd9628
 
2677642
 
 
 
7cd9628
 
 
2677642
 
 
 
 
 
7cd9628
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
# MCP-Powered Voice Assistant with Open-Source Tools
# Hugging Face Space Implementation

import gradio as gr
import numpy as np
import sqlite3
import json
import requests
from PIL import Image
import io
import time

# ------ Mock MCP Server Implementation ------
class MockMCPServer:
    def __init__(self):
        self.tools = {}
        
    def register_tool(self, name, func, description):
        self.tools[name] = {
            "function": func,
            "description": description
        }
    
    def call_tool(self, tool_name, params):
        if tool_name in self.tools:
            return self.tools[tool_name]["function"](**params)
        return {"error": f"Tool {tool_name} not found"}

# ------ Create Mock MCP Server ------
mcp_server = MockMCPServer()

# ------ Tool Implementations ------
def get_recipe_by_ingredients(ingredients):
    """Find recipes based on available ingredients"""
    # In a real implementation, this would call an API
    return {
        "recipes": [
            {"name": "Vegetable Stir Fry", "time": 20, "difficulty": "Easy"},
            {"name": "Pasta Primavera", "time": 30, "difficulty": "Medium"}
        ]
    }

def get_recipe_image(recipe_name):
    """Generate an image of the finished recipe"""
    # In production, this would call a model like Stable Diffusion
    return {
        "image_url": "https://example.com/recipe-image.jpg",
        "alt_text": f"Image of {recipe_name}"
    }

def convert_measurements(amount, from_unit, to_unit):
    """Convert cooking measurements between units"""
    conversions = {
        ("tbsp", "tsp"): lambda x: x * 3,
        ("cups", "ml"): lambda x: x * 240,
        ("oz", "g"): lambda x: x * 28.35
    }
    conversion_key = (from_unit.lower(), to_unit.lower())
    if conversion_key in conversions:
        return {"result": conversions[conversion_key](amount), "unit": to_unit}
    return {"error": "Conversion not supported"}

# ------ Recipe Database ------
def init_recipe_db():
    conn = sqlite3.connect(':memory:')
    c = conn.cursor()
    c.execute('''CREATE TABLE recipes
                 (id INTEGER PRIMARY KEY, name TEXT, ingredients TEXT, instructions TEXT, prep_time INT)''')
    
    recipes = [
        ("Classic Pancakes", json.dumps(["flour", "eggs", "milk", "baking powder"]), 
         "1. Mix dry ingredients\n2. Add wet ingredients\n3. Cook on griddle", 15),
        ("Tomato Soup", json.dumps(["tomatoes", "onion", "garlic", "vegetable stock"]), 
         "1. Sauté onions\n2. Add tomatoes\n3. Simmer and blend", 30)
    ]
    
    c.executemany("INSERT INTO recipes (name, ingredients, instructions, prep_time) VALUES (?,?,?,?)", recipes)
    conn.commit()
    return conn

# ------ Voice Processing Functions ------
def text_to_speech(text):
    """Mock TTS function - in real use, replace with actual TTS"""
    print(f"[TTS]: {text}")
    # Return dummy audio data
    return np.zeros(16000, dtype=np.float32), 16000

def speech_to_text(audio):
    """Mock STT function - in real use, replace with actual STT"""
    # Return dummy text
    return "Show me pancake recipes"

# ------ Agent Logic ------
def process_query(query, db_conn):
    """Process user query using the available tools"""
    # Simple intent recognition
    if "recipe" in query.lower() or "make" in query.lower():
        # Extract ingredients
        ingredients = ["flour", "eggs"]  # Simplified extraction
        return mcp_server.call_tool(
            "get_recipe_by_ingredients",
            {"ingredients": ingredients}
        )
    elif "image" in query.lower() or "show" in query.lower():
        recipe_name = "Classic Pancakes"  # Simplified extraction
        return mcp_server.call_tool(
            "get_recipe_image",
            {"recipe_name": recipe_name}
        )
    elif "convert" in query.lower():
        # Simplified extraction
        return mcp_server.call_tool(
            "convert_measurements",
            {"amount": 2, "from_unit": "cups", "to_unit": "ml"}
        )
    else:
        # Fallback to database search
        c = db_conn.cursor()
        c.execute("SELECT * FROM recipes WHERE name LIKE ?", (f"%{query}%",))
        return c.fetchall()

# ------ Register Tools with MCP Server ------
mcp_server.register_tool(
    "get_recipe_by_ingredients",
    get_recipe_by_ingredients,
    "Find recipes based on available ingredients"
)
mcp_server.register_tool(
    "get_recipe_image",
    get_recipe_image,
    "Generate an image of the finished recipe"
)
mcp_server.register_tool(
    "convert_measurements",
    convert_measurements,
    "Convert cooking measurements between units"
)

# ------ Initialize System ------
db_conn = init_recipe_db()

# ------ Gradio Interface ------
def process_voice_command(audio):
    """Process voice command through the agent system"""
    # Convert audio to text
    query = speech_to_text(audio)
    
    # Process query using agent logic
    result = process_query(query, db_conn)
    
    # Generate response text
    if isinstance(result, list) and result:
        response_text = f"Found {len(result)} recipes:\n"
        for item in result:
            response_text += f"- {item[1]} ({item[4]} mins)\n"
    elif "recipes" in result:
        response_text = f"Found {len(result['recipes'])} recipes:\n"
        for recipe in result["recipes"]:
            response_text += f"- {recipe['name']} ({recipe['time']} mins)\n"
    elif "image_url" in result:
        response_text = f"Here's an image of {result.get('alt_text', 'the recipe')}"
        image = Image.new('RGB', (300, 200), color=(73, 109, 137))
    else:
        response_text = str(result)
        image = None
    
    # Convert response to audio
    audio_data, sr = text_to_speech(response_text)
    
    # Return results
    return (
        (sr, audio_data),
        response_text,
        image if 'image' in locals() else None
    )

# ------ Hugging Face Space UI ------
with gr.Blocks(title="MCP Culinary Voice Assistant") as demo:
    gr.Markdown("# 🧑‍🍳 MCP-Powered Culinary Voice Assistant (Open-Source)")
    gr.Markdown("Speak to your cooking assistant about recipes, conversions, and more!")
    
    with gr.Row():
        audio_input = gr.Audio(source="microphone", type="numpy", label="Speak to Chef Assistant")
        audio_output = gr.Audio(label="Assistant Response", interactive=False)
    
    with gr.Row():
        text_output = gr.Textbox(label="Transcription", interactive=False)
        image_output = gr.Image(label="Recipe Image", interactive=False)
    
    with gr.Row():
        submit_btn = gr.Button("Process Command", variant="primary")
    
    submit_btn.click(
        fn=process_voice_command,
        inputs=[audio_input],
        outputs=[audio_output, text_output, image_output]
    )

    gr.Examples(
        examples=[
            ["What can I make with eggs and flour?"],
            ["Show me how tomato soup looks"],
            ["Convert 2 cups to milliliters"]
        ],
        inputs=[text_output],
        label="Example Queries"
    )

if __name__ == "__main__":
    demo.launch()