Spaces:
Sleeping
Sleeping
# MCP-Powered Voice Assistant with Open-Source Tools | |
# Hugging Face Space Implementation | |
import gradio as gr | |
import numpy as np | |
import sqlite3 | |
import json | |
import requests | |
from PIL import Image | |
import io | |
import time | |
# ------ Mock MCP Server Implementation ------ | |
class MockMCPServer: | |
def __init__(self): | |
self.tools = {} | |
def register_tool(self, name, func, description): | |
self.tools[name] = { | |
"function": func, | |
"description": description | |
} | |
def call_tool(self, tool_name, params): | |
if tool_name in self.tools: | |
return self.tools[tool_name]["function"](**params) | |
return {"error": f"Tool {tool_name} not found"} | |
# ------ Create Mock MCP Server ------ | |
mcp_server = MockMCPServer() | |
# ------ Tool Implementations ------ | |
def get_recipe_by_ingredients(ingredients): | |
"""Find recipes based on available ingredients""" | |
# In a real implementation, this would call an API | |
return { | |
"recipes": [ | |
{"name": "Vegetable Stir Fry", "time": 20, "difficulty": "Easy"}, | |
{"name": "Pasta Primavera", "time": 30, "difficulty": "Medium"} | |
] | |
} | |
def get_recipe_image(recipe_name): | |
"""Generate an image of the finished recipe""" | |
# In production, this would call a model like Stable Diffusion | |
return { | |
"image_url": "https://example.com/recipe-image.jpg", | |
"alt_text": f"Image of {recipe_name}" | |
} | |
def convert_measurements(amount, from_unit, to_unit): | |
"""Convert cooking measurements between units""" | |
conversions = { | |
("tbsp", "tsp"): lambda x: x * 3, | |
("cups", "ml"): lambda x: x * 240, | |
("oz", "g"): lambda x: x * 28.35 | |
} | |
conversion_key = (from_unit.lower(), to_unit.lower()) | |
if conversion_key in conversions: | |
return {"result": conversions[conversion_key](amount), "unit": to_unit} | |
return {"error": "Conversion not supported"} | |
# ------ Recipe Database ------ | |
def init_recipe_db(): | |
conn = sqlite3.connect(':memory:') | |
c = conn.cursor() | |
c.execute('''CREATE TABLE recipes | |
(id INTEGER PRIMARY KEY, name TEXT, ingredients TEXT, instructions TEXT, prep_time INT)''') | |
recipes = [ | |
("Classic Pancakes", json.dumps(["flour", "eggs", "milk", "baking powder"]), | |
"1. Mix dry ingredients\n2. Add wet ingredients\n3. Cook on griddle", 15), | |
("Tomato Soup", json.dumps(["tomatoes", "onion", "garlic", "vegetable stock"]), | |
"1. Sauté onions\n2. Add tomatoes\n3. Simmer and blend", 30) | |
] | |
c.executemany("INSERT INTO recipes (name, ingredients, instructions, prep_time) VALUES (?,?,?,?)", recipes) | |
conn.commit() | |
return conn | |
# ------ Voice Processing Functions ------ | |
def text_to_speech(text): | |
"""Mock TTS function - in real use, replace with actual TTS""" | |
print(f"[TTS]: {text}") | |
# Return dummy audio data | |
return np.zeros(16000, dtype=np.float32), 16000 | |
def speech_to_text(audio): | |
"""Mock STT function - in real use, replace with actual STT""" | |
# Return dummy text | |
return "Show me pancake recipes" | |
# ------ Agent Logic ------ | |
def process_query(query, db_conn): | |
"""Process user query using the available tools""" | |
# Simple intent recognition | |
if "recipe" in query.lower() or "make" in query.lower(): | |
# Extract ingredients | |
ingredients = ["flour", "eggs"] # Simplified extraction | |
return mcp_server.call_tool( | |
"get_recipe_by_ingredients", | |
{"ingredients": ingredients} | |
) | |
elif "image" in query.lower() or "show" in query.lower(): | |
recipe_name = "Classic Pancakes" # Simplified extraction | |
return mcp_server.call_tool( | |
"get_recipe_image", | |
{"recipe_name": recipe_name} | |
) | |
elif "convert" in query.lower(): | |
# Simplified extraction | |
return mcp_server.call_tool( | |
"convert_measurements", | |
{"amount": 2, "from_unit": "cups", "to_unit": "ml"} | |
) | |
else: | |
# Fallback to database search | |
c = db_conn.cursor() | |
c.execute("SELECT * FROM recipes WHERE name LIKE ?", (f"%{query}%",)) | |
return c.fetchall() | |
# ------ Register Tools with MCP Server ------ | |
mcp_server.register_tool( | |
"get_recipe_by_ingredients", | |
get_recipe_by_ingredients, | |
"Find recipes based on available ingredients" | |
) | |
mcp_server.register_tool( | |
"get_recipe_image", | |
get_recipe_image, | |
"Generate an image of the finished recipe" | |
) | |
mcp_server.register_tool( | |
"convert_measurements", | |
convert_measurements, | |
"Convert cooking measurements between units" | |
) | |
# ------ Initialize System ------ | |
db_conn = init_recipe_db() | |
# ------ Gradio Interface ------ | |
def process_voice_command(audio): | |
"""Process voice command through the agent system""" | |
# Convert audio to text | |
query = speech_to_text(audio) | |
# Process query using agent logic | |
result = process_query(query, db_conn) | |
# Generate response text | |
if isinstance(result, list) and result: | |
response_text = f"Found {len(result)} recipes:\n" | |
for item in result: | |
response_text += f"- {item[1]} ({item[4]} mins)\n" | |
elif "recipes" in result: | |
response_text = f"Found {len(result['recipes'])} recipes:\n" | |
for recipe in result["recipes"]: | |
response_text += f"- {recipe['name']} ({recipe['time']} mins)\n" | |
elif "image_url" in result: | |
response_text = f"Here's an image of {result.get('alt_text', 'the recipe')}" | |
image = Image.new('RGB', (300, 200), color=(73, 109, 137)) | |
else: | |
response_text = str(result) | |
image = None | |
# Convert response to audio | |
audio_data, sr = text_to_speech(response_text) | |
# Return results | |
return ( | |
(sr, audio_data), | |
response_text, | |
image if 'image' in locals() else None | |
) | |
# ------ Hugging Face Space UI ------ | |
with gr.Blocks(title="MCP Culinary Voice Assistant") as demo: | |
gr.Markdown("# 🧑🍳 MCP-Powered Culinary Voice Assistant (Open-Source)") | |
gr.Markdown("Speak to your cooking assistant about recipes, conversions, and more!") | |
with gr.Row(): | |
audio_input = gr.Audio(source="microphone", type="numpy", label="Speak to Chef Assistant") | |
audio_output = gr.Audio(label="Assistant Response", interactive=False) | |
with gr.Row(): | |
text_output = gr.Textbox(label="Transcription", interactive=False) | |
image_output = gr.Image(label="Recipe Image", interactive=False) | |
with gr.Row(): | |
submit_btn = gr.Button("Process Command", variant="primary") | |
submit_btn.click( | |
fn=process_voice_command, | |
inputs=[audio_input], | |
outputs=[audio_output, text_output, image_output] | |
) | |
gr.Examples( | |
examples=[ | |
["What can I make with eggs and flour?"], | |
["Show me how tomato soup looks"], | |
["Convert 2 cups to milliliters"] | |
], | |
inputs=[text_output], | |
label="Example Queries" | |
) | |
if __name__ == "__main__": | |
demo.launch() |