mgbam commited on
Commit
1155ca4
·
verified ·
1 Parent(s): d31b137

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +106 -64
app.py CHANGED
@@ -1,14 +1,9 @@
1
- # MCP-Powered Voice Assistant with Open-Source Tools
2
- # Hugging Face Space Implementation
3
-
4
  import gradio as gr
5
  import numpy as np
6
  import sqlite3
7
  import json
8
- import requests
9
- from PIL import Image
10
- import io
11
  import time
 
12
 
13
  # ------ Mock MCP Server Implementation ------
14
  class MockMCPServer:
@@ -33,6 +28,7 @@ mcp_server = MockMCPServer()
33
  def get_recipe_by_ingredients(ingredients):
34
  """Find recipes based on available ingredients"""
35
  # In a real implementation, this would call an API
 
36
  return {
37
  "recipes": [
38
  {"name": "Vegetable Stir Fry", "time": 20, "difficulty": "Easy"},
@@ -42,14 +38,16 @@ def get_recipe_by_ingredients(ingredients):
42
 
43
  def get_recipe_image(recipe_name):
44
  """Generate an image of the finished recipe"""
45
- # In production, this would call a model like Stable Diffusion
46
- return {
47
- "image_url": "https://example.com/recipe-image.jpg",
48
- "alt_text": f"Image of {recipe_name}"
49
- }
 
50
 
51
  def convert_measurements(amount, from_unit, to_unit):
52
  """Convert cooking measurements between units"""
 
53
  conversions = {
54
  ("tbsp", "tsp"): lambda x: x * 3,
55
  ("cups", "ml"): lambda x: x * 240,
@@ -57,7 +55,8 @@ def convert_measurements(amount, from_unit, to_unit):
57
  }
58
  conversion_key = (from_unit.lower(), to_unit.lower())
59
  if conversion_key in conversions:
60
- return {"result": conversions[conversion_key](amount), "unit": to_unit}
 
61
  return {"error": "Conversion not supported"}
62
 
63
  # ------ Recipe Database ------
@@ -71,7 +70,9 @@ def init_recipe_db():
71
  ("Classic Pancakes", json.dumps(["flour", "eggs", "milk", "baking powder"]),
72
  "1. Mix dry ingredients\n2. Add wet ingredients\n3. Cook on griddle", 15),
73
  ("Tomato Soup", json.dumps(["tomatoes", "onion", "garlic", "vegetable stock"]),
74
- "1. Sauté onions\n2. Add tomatoes\n3. Simmer and blend", 30)
 
 
75
  ]
76
 
77
  c.executemany("INSERT INTO recipes (name, ingredients, instructions, prep_time) VALUES (?,?,?,?)", recipes)
@@ -82,42 +83,73 @@ def init_recipe_db():
82
  def text_to_speech(text):
83
  """Mock TTS function - in real use, replace with actual TTS"""
84
  print(f"[TTS]: {text}")
85
- # Return dummy audio data
86
- return np.zeros(16000, dtype=np.float32), 16000
 
 
 
87
 
88
  def speech_to_text(audio):
89
  """Mock STT function - in real use, replace with actual STT"""
90
- # Return dummy text
91
- return "Show me pancake recipes"
 
 
 
92
 
93
  # ------ Agent Logic ------
94
  def process_query(query, db_conn):
95
  """Process user query using the available tools"""
 
96
  # Simple intent recognition
97
- if "recipe" in query.lower() or "make" in query.lower():
98
- # Extract ingredients
99
- ingredients = ["flour", "eggs"] # Simplified extraction
100
- return mcp_server.call_tool(
101
- "get_recipe_by_ingredients",
102
- {"ingredients": ingredients}
103
- )
104
- elif "image" in query.lower() or "show" in query.lower():
105
- recipe_name = "Classic Pancakes" # Simplified extraction
106
- return mcp_server.call_tool(
107
- "get_recipe_image",
108
- {"recipe_name": recipe_name}
109
- )
 
 
 
 
 
 
 
 
 
 
110
  elif "convert" in query.lower():
111
- # Simplified extraction
112
- return mcp_server.call_tool(
113
- "convert_measurements",
114
- {"amount": 2, "from_unit": "cups", "to_unit": "ml"}
115
- )
 
 
 
 
 
 
 
 
 
 
116
  else:
117
  # Fallback to database search
118
  c = db_conn.cursor()
119
  c.execute("SELECT * FROM recipes WHERE name LIKE ?", (f"%{query}%",))
120
- return c.fetchall()
 
 
 
 
121
 
122
  # ------ Register Tools with MCP Server ------
123
  mcp_server.register_tool(
@@ -148,48 +180,57 @@ def process_voice_command(audio):
148
  # Process query using agent logic
149
  result = process_query(query, db_conn)
150
 
151
- # Generate response text
152
- if isinstance(result, list) and result:
153
- response_text = f"Found {len(result)} recipes:\n"
154
- for item in result:
155
- response_text += f"- {item[1]} ({item[4]} mins)\n"
156
- elif "recipes" in result:
157
- response_text = f"Found {len(result['recipes'])} recipes:\n"
158
- for recipe in result["recipes"]:
159
- response_text += f"- {recipe['name']} ({recipe['time']} mins)\n"
160
- elif "image_url" in result:
161
- response_text = f"Here's an image of {result.get('alt_text', 'the recipe')}"
162
- image = Image.new('RGB', (300, 200), color=(73, 109, 137))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
163
  else:
164
- response_text = str(result)
165
- image = None
166
 
167
  # Convert response to audio
168
- audio_data, sr = text_to_speech(response_text)
169
 
170
- # Return results
171
- return (
172
- (sr, audio_data),
173
- response_text,
174
- image if 'image' in locals() else None
175
- )
176
 
177
  # ------ Hugging Face Space UI ------
178
  with gr.Blocks(title="MCP Culinary Voice Assistant") as demo:
179
- gr.Markdown("# 🧑‍🍳 MCP-Powered Culinary Voice Assistant (Open-Source)")
180
  gr.Markdown("Speak to your cooking assistant about recipes, conversions, and more!")
181
 
182
  with gr.Row():
183
- audio_input = gr.Audio(source="microphone", type="numpy", label="Speak to Chef Assistant")
184
- audio_output = gr.Audio(label="Assistant Response", interactive=False)
 
 
 
185
 
186
  with gr.Row():
187
  text_output = gr.Textbox(label="Transcription", interactive=False)
188
  image_output = gr.Image(label="Recipe Image", interactive=False)
189
 
190
- with gr.Row():
191
- submit_btn = gr.Button("Process Command", variant="primary")
192
-
193
  submit_btn.click(
194
  fn=process_voice_command,
195
  inputs=[audio_input],
@@ -200,7 +241,8 @@ with gr.Blocks(title="MCP Culinary Voice Assistant") as demo:
200
  examples=[
201
  ["What can I make with eggs and flour?"],
202
  ["Show me how tomato soup looks"],
203
- ["Convert 2 cups to milliliters"]
 
204
  ],
205
  inputs=[text_output],
206
  label="Example Queries"
 
 
 
 
1
  import gradio as gr
2
  import numpy as np
3
  import sqlite3
4
  import json
 
 
 
5
  import time
6
+ from PIL import Image, ImageDraw
7
 
8
  # ------ Mock MCP Server Implementation ------
9
  class MockMCPServer:
 
28
  def get_recipe_by_ingredients(ingredients):
29
  """Find recipes based on available ingredients"""
30
  # In a real implementation, this would call an API
31
+ print(f"Searching recipes with ingredients: {ingredients}")
32
  return {
33
  "recipes": [
34
  {"name": "Vegetable Stir Fry", "time": 20, "difficulty": "Easy"},
 
38
 
39
  def get_recipe_image(recipe_name):
40
  """Generate an image of the finished recipe"""
41
+ print(f"Generating image for: {recipe_name}")
42
+ # Create a placeholder image with the recipe name
43
+ img = Image.new('RGB', (300, 200), color=(73, 109, 137))
44
+ d = ImageDraw.Draw(img)
45
+ d.text((10,10), f"Image of: {recipe_name}", fill=(255,255,0))
46
+ return img
47
 
48
  def convert_measurements(amount, from_unit, to_unit):
49
  """Convert cooking measurements between units"""
50
+ print(f"Converting {amount} {from_unit} to {to_unit}")
51
  conversions = {
52
  ("tbsp", "tsp"): lambda x: x * 3,
53
  ("cups", "ml"): lambda x: x * 240,
 
55
  }
56
  conversion_key = (from_unit.lower(), to_unit.lower())
57
  if conversion_key in conversions:
58
+ result = conversions[conversion_key](amount)
59
+ return {"result": round(result, 2), "unit": to_unit}
60
  return {"error": "Conversion not supported"}
61
 
62
  # ------ Recipe Database ------
 
70
  ("Classic Pancakes", json.dumps(["flour", "eggs", "milk", "baking powder"]),
71
  "1. Mix dry ingredients\n2. Add wet ingredients\n3. Cook on griddle", 15),
72
  ("Tomato Soup", json.dumps(["tomatoes", "onion", "garlic", "vegetable stock"]),
73
+ "1. Sauté onions\n2. Add tomatoes\n3. Simmer and blend", 30),
74
+ ("Chocolate Cake", json.dumps(["flour", "sugar", "cocoa", "eggs", "milk"]),
75
+ "1. Mix dry ingredients\n2. Add wet ingredients\n3. Bake at 350°F", 45)
76
  ]
77
 
78
  c.executemany("INSERT INTO recipes (name, ingredients, instructions, prep_time) VALUES (?,?,?,?)", recipes)
 
83
  def text_to_speech(text):
84
  """Mock TTS function - in real use, replace with actual TTS"""
85
  print(f"[TTS]: {text}")
86
+ # Return dummy audio data (silence)
87
+ duration = 2 # seconds
88
+ sample_rate = 44100
89
+ samples = np.zeros(int(duration * sample_rate), dtype=np.float32)
90
+ return (sample_rate, samples)
91
 
92
  def speech_to_text(audio):
93
  """Mock STT function - in real use, replace with actual STT"""
94
+ # For now, we return a fixed string. In reality, we would process the audio
95
+ sample_rate, audio_data = audio
96
+ print(f"Received audio with sample rate {sample_rate} and shape {audio_data.shape}")
97
+ # Return a fixed response for demo
98
+ return "What can I make with eggs and flour?"
99
 
100
  # ------ Agent Logic ------
101
  def process_query(query, db_conn):
102
  """Process user query using the available tools"""
103
+ print(f"Processing query: {query}")
104
  # Simple intent recognition
105
+ if "recipe" in query.lower() or "make" in query.lower() or "cook" in query.lower():
106
+ # Extract ingredients - very simple, just use some keywords
107
+ ingredients = []
108
+ for word in ["eggs", "flour", "milk", "tomatoes", "onion", "garlic"]:
109
+ if word in query.lower():
110
+ ingredients.append(word)
111
+ if not ingredients:
112
+ ingredients = ["eggs", "flour"] # default
113
+ return {
114
+ "type": "recipes",
115
+ "data": mcp_server.call_tool("get_recipe_by_ingredients", {"ingredients": ingredients})
116
+ }
117
+ elif "image" in query.lower() or "show" in query.lower() or "look" in query.lower():
118
+ # Extract recipe name
119
+ recipe_name = "Classic Pancakes" # default
120
+ for recipe in ["pancakes", "stir fry", "tomato soup", "chocolate cake"]:
121
+ if recipe in query.lower():
122
+ recipe_name = recipe
123
+ break
124
+ return {
125
+ "type": "image",
126
+ "data": mcp_server.call_tool("get_recipe_image", {"recipe_name": recipe_name})
127
+ }
128
  elif "convert" in query.lower():
129
+ # Extract amount and units - very simple
130
+ # Assume pattern: convert <number> <unit> to <unit>
131
+ words = query.split()
132
+ try:
133
+ amount = float(words[words.index("convert")+1])
134
+ from_unit = words[words.index("convert")+2]
135
+ to_unit = words[words.index("to")+1]
136
+ except:
137
+ amount = 2
138
+ from_unit = "cups"
139
+ to_unit = "ml"
140
+ return {
141
+ "type": "conversion",
142
+ "data": mcp_server.call_tool("convert_measurements", {"amount": amount, "from_unit": from_unit, "to_unit": to_unit})
143
+ }
144
  else:
145
  # Fallback to database search
146
  c = db_conn.cursor()
147
  c.execute("SELECT * FROM recipes WHERE name LIKE ?", (f"%{query}%",))
148
+ recipes = c.fetchall()
149
+ return {
150
+ "type": "db_recipes",
151
+ "data": recipes
152
+ }
153
 
154
  # ------ Register Tools with MCP Server ------
155
  mcp_server.register_tool(
 
180
  # Process query using agent logic
181
  result = process_query(query, db_conn)
182
 
183
+ # Generate response text and image
184
+ response_text = ""
185
+ image = None
186
+
187
+ if result["type"] == "recipes":
188
+ recipes = result["data"]["recipes"]
189
+ response_text = f"Found {len(recipes)} recipes:\n"
190
+ for recipe in recipes:
191
+ response_text += f"- {recipe['name']} ({recipe['time']} mins, {recipe['difficulty']})\n"
192
+ elif result["type"] == "image":
193
+ image = result["data"] # This is a PIL image
194
+ response_text = "Here is an image of the recipe!"
195
+ elif result["type"] == "conversion":
196
+ conv = result["data"]
197
+ if "error" in conv:
198
+ response_text = f"Error: {conv['error']}"
199
+ else:
200
+ response_text = f"{conv['result']} {conv['unit']}"
201
+ elif result["type"] == "db_recipes":
202
+ recipes = result["data"]
203
+ if recipes:
204
+ response_text = f"Found {len(recipes)} recipes in database:\n"
205
+ for recipe in recipes:
206
+ response_text += f"- {recipe[1]} ({recipe[4]} mins)\n"
207
+ else:
208
+ response_text = "No recipes found."
209
  else:
210
+ response_text = "I'm not sure how to help with that."
 
211
 
212
  # Convert response to audio
213
+ sr, audio_data = text_to_speech(response_text)
214
 
215
+ # Return results: audio output, text, and image
216
+ return (sr, audio_data), response_text, image
 
 
 
 
217
 
218
  # ------ Hugging Face Space UI ------
219
  with gr.Blocks(title="MCP Culinary Voice Assistant") as demo:
220
+ gr.Markdown("# 🧑‍🍳 MCP-Powered Culinary Voice Assistant")
221
  gr.Markdown("Speak to your cooking assistant about recipes, conversions, and more!")
222
 
223
  with gr.Row():
224
+ with gr.Column():
225
+ audio_input = gr.Audio(source="microphone", type="numpy", label="Speak to Chef Assistant")
226
+ submit_btn = gr.Button("Process Command", variant="primary")
227
+ with gr.Column():
228
+ audio_output = gr.Audio(label="Assistant Response", interactive=False)
229
 
230
  with gr.Row():
231
  text_output = gr.Textbox(label="Transcription", interactive=False)
232
  image_output = gr.Image(label="Recipe Image", interactive=False)
233
 
 
 
 
234
  submit_btn.click(
235
  fn=process_voice_command,
236
  inputs=[audio_input],
 
241
  examples=[
242
  ["What can I make with eggs and flour?"],
243
  ["Show me how tomato soup looks"],
244
+ ["Convert 2 cups to milliliters"],
245
+ ["Find chocolate cake recipes"]
246
  ],
247
  inputs=[text_output],
248
  label="Example Queries"