mgbam commited on
Commit
7cd9628
·
verified ·
1 Parent(s): 2677642

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +130 -99
app.py CHANGED
@@ -1,26 +1,38 @@
1
- # MCP-Powered Culinary Voice Assistant
2
  # Hugging Face Space Implementation
3
 
4
  import gradio as gr
5
  import numpy as np
6
- from mcp.server.fastmcp import FastMCP
7
- from agents import Agent, trace
8
- from agents.mcp import MCPServerSse, MCPServerStdio
9
- from agents.voice import VoicePipeline, TTSModelSettings, AudioInput
10
  import sqlite3
11
  import json
12
  import requests
13
  from PIL import Image
14
  import io
 
15
 
16
- # ------ Custom MCP Cooking Tools Server ------
17
- mcp = FastMCP("Culinary Tools Server")
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
- @mcp.tool()
20
- def get_recipe_by_ingredients(ingredients: list) -> dict:
 
 
 
21
  """Find recipes based on available ingredients"""
22
- print(f"[Culinary Server] Finding recipes with: {', '.join(ingredients)}")
23
- # In a real implementation, this would call a recipe API
24
  return {
25
  "recipes": [
26
  {"name": "Vegetable Stir Fry", "time": 20, "difficulty": "Easy"},
@@ -28,18 +40,16 @@ def get_recipe_by_ingredients(ingredients: list) -> dict:
28
  ]
29
  }
30
 
31
- @mcp.tool()
32
- def get_recipe_image(recipe_name: str) -> str:
33
  """Generate an image of the finished recipe"""
34
- print(f"[Culinary Server] Generating image for: {recipe_name}")
35
- # This would call DALL-E or Stable Diffusion in production
36
- return "https://example.com/recipe-image.jpg"
 
 
37
 
38
- @mcp.tool()
39
- def convert_measurements(amount: float, from_unit: str, to_unit: str) -> dict:
40
  """Convert cooking measurements between units"""
41
- print(f"[Culinary Server] Converting {amount} {from_unit} to {to_unit}")
42
- # Simple conversion logic - real implementation would handle more units
43
  conversions = {
44
  ("tbsp", "tsp"): lambda x: x * 3,
45
  ("cups", "ml"): lambda x: x * 240,
@@ -50,18 +60,17 @@ def convert_measurements(amount: float, from_unit: str, to_unit: str) -> dict:
50
  return {"result": conversions[conversion_key](amount), "unit": to_unit}
51
  return {"error": "Conversion not supported"}
52
 
53
- # ------ Recipe Database (SQLite) ------
54
  def init_recipe_db():
55
- conn = sqlite3.connect('file:recipes.db?mode=memory&cache=shared', uri=True)
56
  c = conn.cursor()
57
- c.execute('''CREATE TABLE IF NOT EXISTS recipes
58
  (id INTEGER PRIMARY KEY, name TEXT, ingredients TEXT, instructions TEXT, prep_time INT)''')
59
 
60
- # Sample recipes
61
  recipes = [
62
- ("Classic Pancakes", "['flour', 'eggs', 'milk', 'baking powder']",
63
  "1. Mix dry ingredients\n2. Add wet ingredients\n3. Cook on griddle", 15),
64
- ("Tomato Soup", "['tomatoes', 'onion', 'garlic', 'vegetable stock']",
65
  "1. Sauté onions\n2. Add tomatoes\n3. Simmer and blend", 30)
66
  ]
67
 
@@ -69,77 +78,106 @@ def init_recipe_db():
69
  conn.commit()
70
  return conn
71
 
72
- # ------ Voice Assistant Setup ------
73
- def create_culinary_agent(mcp_servers):
74
- """Create the culinary assistant agent"""
75
- culinary_agent = Agent(
76
- name="ChefAssistant",
77
- instructions="""
78
- You are a professional chef assistant. Help users with cooking tasks:
79
- 1. Use get_recipe_by_ingredients when users have specific ingredients
80
- 2. Use get_recipe_details for known recipes
81
- 3. Use convert_measurements for unit conversions
82
- 4. Use get_recipe_image when the user asks to see a dish
83
- 5. Keep responses concise and practical for kitchen use
84
- 6. Use a warm, encouraging tone suitable for cooking
85
- """,
86
- mcp_servers=mcp_servers,
87
- model="gpt-4.1-mini",
88
- )
89
- return culinary_agent
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
 
91
  # ------ Gradio Interface ------
92
- def process_voice_command(audio, state):
93
  """Process voice command through the agent system"""
94
- sr, audio_data = audio
95
- audio_array = (audio_data / np.iinfo(audio_data.dtype).max).astype(np.float32)
96
 
97
- # Initialize on first run
98
- if state is None:
99
- init_recipe_db()
100
- state = {
101
- "mcp_servers": [],
102
- "agent": None,
103
- "voice_pipeline": VoicePipeline(
104
- workflow=None,
105
- config=VoicePipelineConfig(
106
- tts_settings=TTSModelSettings(
107
- instructions="Warm, encouraging chef voice"
108
- )
109
- )
110
- )
111
- }
112
-
113
- # Start MCP servers
114
- with MCPServerSse(
115
- name="Culinary Tools",
116
- params={"url": "http://localhost:8000/sse"},
117
- client_session_timeout_seconds=15,
118
- ) as culinary_server:
119
- with MCPServerStdio(
120
- params={"command": "uvx", "args": ["mcp-server-sqlite", "--db-path", "file:recipes.db?mode=memory&cache=shared"]},
121
- ) as db_server:
122
- state["mcp_servers"] = [culinary_server, db_server]
123
- state["agent"] = create_culinary_agent(state["mcp_servers"])
124
 
125
- # Process audio through agent
126
- audio_input = AudioInput(buffer=audio_array, sample_rate=sr)
127
- response = state["voice_pipeline"].run(state["agent"], audio_input)
 
 
 
 
 
 
 
 
 
 
 
 
128
 
129
- # For demo purposes, return mock response
 
 
 
130
  return (
131
- "https://example.com/response.wav",
132
- "I found 3 recipes for your ingredients! Vegetable Stir Fry (20 mins) and Pasta Primavera (30 mins).",
133
- "https://example.com/stir-fry.jpg",
134
- state
135
  )
136
 
137
  # ------ Hugging Face Space UI ------
138
  with gr.Blocks(title="MCP Culinary Voice Assistant") as demo:
139
- state = gr.State(value=None)
140
-
141
- with gr.Row():
142
- gr.Markdown("# 🧑‍🍳 MCP-Powered Culinary Voice Assistant")
143
 
144
  with gr.Row():
145
  audio_input = gr.Audio(source="microphone", type="numpy", label="Speak to Chef Assistant")
@@ -154,26 +192,19 @@ with gr.Blocks(title="MCP Culinary Voice Assistant") as demo:
154
 
155
  submit_btn.click(
156
  fn=process_voice_command,
157
- inputs=[audio_input, state],
158
- outputs=[audio_output, text_output, image_output, state]
159
  )
160
 
161
  gr.Examples(
162
  examples=[
163
- ["What can I make with eggs and flour?", "", ""],
164
- ["Show me how tomato soup looks", "", ""],
165
- ["Convert 2 cups to milliliters", "", ""]
166
  ],
167
  inputs=[text_output],
168
  label="Example Queries"
169
  )
170
 
171
  if __name__ == "__main__":
172
- # Start MCP server in background thread
173
- import threading
174
- server_thread = threading.Thread(target=mcp.run, kwargs={"transport": "sse"})
175
- server_thread.daemon = True
176
- server_thread.start()
177
-
178
- # Launch Gradio interface
179
- demo.launch(server_name="0.0.0.0", server_port=7860)
 
1
+ # MCP-Powered Voice Assistant with Open-Source Tools
2
  # Hugging Face Space Implementation
3
 
4
  import gradio as gr
5
  import numpy as np
 
 
 
 
6
  import sqlite3
7
  import json
8
  import requests
9
  from PIL import Image
10
  import io
11
+ import time
12
 
13
+ # ------ Mock MCP Server Implementation ------
14
+ class MockMCPServer:
15
+ def __init__(self):
16
+ self.tools = {}
17
+
18
+ def register_tool(self, name, func, description):
19
+ self.tools[name] = {
20
+ "function": func,
21
+ "description": description
22
+ }
23
+
24
+ def call_tool(self, tool_name, params):
25
+ if tool_name in self.tools:
26
+ return self.tools[tool_name]["function"](**params)
27
+ return {"error": f"Tool {tool_name} not found"}
28
 
29
+ # ------ Create Mock MCP Server ------
30
+ mcp_server = MockMCPServer()
31
+
32
+ # ------ Tool Implementations ------
33
+ def get_recipe_by_ingredients(ingredients):
34
  """Find recipes based on available ingredients"""
35
+ # In a real implementation, this would call an API
 
36
  return {
37
  "recipes": [
38
  {"name": "Vegetable Stir Fry", "time": 20, "difficulty": "Easy"},
 
40
  ]
41
  }
42
 
43
+ def get_recipe_image(recipe_name):
 
44
  """Generate an image of the finished recipe"""
45
+ # In production, this would call a model like Stable Diffusion
46
+ return {
47
+ "image_url": "https://example.com/recipe-image.jpg",
48
+ "alt_text": f"Image of {recipe_name}"
49
+ }
50
 
51
+ def convert_measurements(amount, from_unit, to_unit):
 
52
  """Convert cooking measurements between units"""
 
 
53
  conversions = {
54
  ("tbsp", "tsp"): lambda x: x * 3,
55
  ("cups", "ml"): lambda x: x * 240,
 
60
  return {"result": conversions[conversion_key](amount), "unit": to_unit}
61
  return {"error": "Conversion not supported"}
62
 
63
+ # ------ Recipe Database ------
64
  def init_recipe_db():
65
+ conn = sqlite3.connect(':memory:')
66
  c = conn.cursor()
67
+ c.execute('''CREATE TABLE recipes
68
  (id INTEGER PRIMARY KEY, name TEXT, ingredients TEXT, instructions TEXT, prep_time INT)''')
69
 
 
70
  recipes = [
71
+ ("Classic Pancakes", json.dumps(["flour", "eggs", "milk", "baking powder"]),
72
  "1. Mix dry ingredients\n2. Add wet ingredients\n3. Cook on griddle", 15),
73
+ ("Tomato Soup", json.dumps(["tomatoes", "onion", "garlic", "vegetable stock"]),
74
  "1. Sauté onions\n2. Add tomatoes\n3. Simmer and blend", 30)
75
  ]
76
 
 
78
  conn.commit()
79
  return conn
80
 
81
+ # ------ Voice Processing Functions ------
82
+ def text_to_speech(text):
83
+ """Mock TTS function - in real use, replace with actual TTS"""
84
+ print(f"[TTS]: {text}")
85
+ # Return dummy audio data
86
+ return np.zeros(16000, dtype=np.float32), 16000
87
+
88
+ def speech_to_text(audio):
89
+ """Mock STT function - in real use, replace with actual STT"""
90
+ # Return dummy text
91
+ return "Show me pancake recipes"
92
+
93
+ # ------ Agent Logic ------
94
+ def process_query(query, db_conn):
95
+ """Process user query using the available tools"""
96
+ # Simple intent recognition
97
+ if "recipe" in query.lower() or "make" in query.lower():
98
+ # Extract ingredients
99
+ ingredients = ["flour", "eggs"] # Simplified extraction
100
+ return mcp_server.call_tool(
101
+ "get_recipe_by_ingredients",
102
+ {"ingredients": ingredients}
103
+ )
104
+ elif "image" in query.lower() or "show" in query.lower():
105
+ recipe_name = "Classic Pancakes" # Simplified extraction
106
+ return mcp_server.call_tool(
107
+ "get_recipe_image",
108
+ {"recipe_name": recipe_name}
109
+ )
110
+ elif "convert" in query.lower():
111
+ # Simplified extraction
112
+ return mcp_server.call_tool(
113
+ "convert_measurements",
114
+ {"amount": 2, "from_unit": "cups", "to_unit": "ml"}
115
+ )
116
+ else:
117
+ # Fallback to database search
118
+ c = db_conn.cursor()
119
+ c.execute("SELECT * FROM recipes WHERE name LIKE ?", (f"%{query}%",))
120
+ return c.fetchall()
121
+
122
+ # ------ Register Tools with MCP Server ------
123
+ mcp_server.register_tool(
124
+ "get_recipe_by_ingredients",
125
+ get_recipe_by_ingredients,
126
+ "Find recipes based on available ingredients"
127
+ )
128
+ mcp_server.register_tool(
129
+ "get_recipe_image",
130
+ get_recipe_image,
131
+ "Generate an image of the finished recipe"
132
+ )
133
+ mcp_server.register_tool(
134
+ "convert_measurements",
135
+ convert_measurements,
136
+ "Convert cooking measurements between units"
137
+ )
138
+
139
+ # ------ Initialize System ------
140
+ db_conn = init_recipe_db()
141
 
142
  # ------ Gradio Interface ------
143
+ def process_voice_command(audio):
144
  """Process voice command through the agent system"""
145
+ # Convert audio to text
146
+ query = speech_to_text(audio)
147
 
148
+ # Process query using agent logic
149
+ result = process_query(query, db_conn)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
150
 
151
+ # Generate response text
152
+ if isinstance(result, list) and result:
153
+ response_text = f"Found {len(result)} recipes:\n"
154
+ for item in result:
155
+ response_text += f"- {item[1]} ({item[4]} mins)\n"
156
+ elif "recipes" in result:
157
+ response_text = f"Found {len(result['recipes'])} recipes:\n"
158
+ for recipe in result["recipes"]:
159
+ response_text += f"- {recipe['name']} ({recipe['time']} mins)\n"
160
+ elif "image_url" in result:
161
+ response_text = f"Here's an image of {result.get('alt_text', 'the recipe')}"
162
+ image = Image.new('RGB', (300, 200), color=(73, 109, 137))
163
+ else:
164
+ response_text = str(result)
165
+ image = None
166
 
167
+ # Convert response to audio
168
+ audio_data, sr = text_to_speech(response_text)
169
+
170
+ # Return results
171
  return (
172
+ (sr, audio_data),
173
+ response_text,
174
+ image if 'image' in locals() else None
 
175
  )
176
 
177
  # ------ Hugging Face Space UI ------
178
  with gr.Blocks(title="MCP Culinary Voice Assistant") as demo:
179
+ gr.Markdown("# 🧑‍🍳 MCP-Powered Culinary Voice Assistant (Open-Source)")
180
+ gr.Markdown("Speak to your cooking assistant about recipes, conversions, and more!")
 
 
181
 
182
  with gr.Row():
183
  audio_input = gr.Audio(source="microphone", type="numpy", label="Speak to Chef Assistant")
 
192
 
193
  submit_btn.click(
194
  fn=process_voice_command,
195
+ inputs=[audio_input],
196
+ outputs=[audio_output, text_output, image_output]
197
  )
198
 
199
  gr.Examples(
200
  examples=[
201
+ ["What can I make with eggs and flour?"],
202
+ ["Show me how tomato soup looks"],
203
+ ["Convert 2 cups to milliliters"]
204
  ],
205
  inputs=[text_output],
206
  label="Example Queries"
207
  )
208
 
209
  if __name__ == "__main__":
210
+ demo.launch()