Spaces:
Sleeping
Sleeping
File size: 7,099 Bytes
7cd9628 2677642 7cd9628 2677642 7cd9628 2677642 7cd9628 2677642 7cd9628 2677642 7cd9628 2677642 7cd9628 2677642 7cd9628 2677642 7cd9628 2677642 7cd9628 2677642 7cd9628 2677642 7cd9628 2677642 7cd9628 2677642 7cd9628 2677642 7cd9628 2677642 7cd9628 2677642 7cd9628 2677642 7cd9628 2677642 7cd9628 2677642 7cd9628 2677642 7cd9628 2677642 7cd9628 2677642 7cd9628 2677642 7cd9628 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 |
# MCP-Powered Voice Assistant with Open-Source Tools
# Hugging Face Space Implementation
import gradio as gr
import numpy as np
import sqlite3
import json
import requests
from PIL import Image
import io
import time
# ------ Mock MCP Server Implementation ------
class MockMCPServer:
def __init__(self):
self.tools = {}
def register_tool(self, name, func, description):
self.tools[name] = {
"function": func,
"description": description
}
def call_tool(self, tool_name, params):
if tool_name in self.tools:
return self.tools[tool_name]["function"](**params)
return {"error": f"Tool {tool_name} not found"}
# ------ Create Mock MCP Server ------
mcp_server = MockMCPServer()
# ------ Tool Implementations ------
def get_recipe_by_ingredients(ingredients):
"""Find recipes based on available ingredients"""
# In a real implementation, this would call an API
return {
"recipes": [
{"name": "Vegetable Stir Fry", "time": 20, "difficulty": "Easy"},
{"name": "Pasta Primavera", "time": 30, "difficulty": "Medium"}
]
}
def get_recipe_image(recipe_name):
"""Generate an image of the finished recipe"""
# In production, this would call a model like Stable Diffusion
return {
"image_url": "https://example.com/recipe-image.jpg",
"alt_text": f"Image of {recipe_name}"
}
def convert_measurements(amount, from_unit, to_unit):
"""Convert cooking measurements between units"""
conversions = {
("tbsp", "tsp"): lambda x: x * 3,
("cups", "ml"): lambda x: x * 240,
("oz", "g"): lambda x: x * 28.35
}
conversion_key = (from_unit.lower(), to_unit.lower())
if conversion_key in conversions:
return {"result": conversions[conversion_key](amount), "unit": to_unit}
return {"error": "Conversion not supported"}
# ------ Recipe Database ------
def init_recipe_db():
conn = sqlite3.connect(':memory:')
c = conn.cursor()
c.execute('''CREATE TABLE recipes
(id INTEGER PRIMARY KEY, name TEXT, ingredients TEXT, instructions TEXT, prep_time INT)''')
recipes = [
("Classic Pancakes", json.dumps(["flour", "eggs", "milk", "baking powder"]),
"1. Mix dry ingredients\n2. Add wet ingredients\n3. Cook on griddle", 15),
("Tomato Soup", json.dumps(["tomatoes", "onion", "garlic", "vegetable stock"]),
"1. Sauté onions\n2. Add tomatoes\n3. Simmer and blend", 30)
]
c.executemany("INSERT INTO recipes (name, ingredients, instructions, prep_time) VALUES (?,?,?,?)", recipes)
conn.commit()
return conn
# ------ Voice Processing Functions ------
def text_to_speech(text):
"""Mock TTS function - in real use, replace with actual TTS"""
print(f"[TTS]: {text}")
# Return dummy audio data
return np.zeros(16000, dtype=np.float32), 16000
def speech_to_text(audio):
"""Mock STT function - in real use, replace with actual STT"""
# Return dummy text
return "Show me pancake recipes"
# ------ Agent Logic ------
def process_query(query, db_conn):
"""Process user query using the available tools"""
# Simple intent recognition
if "recipe" in query.lower() or "make" in query.lower():
# Extract ingredients
ingredients = ["flour", "eggs"] # Simplified extraction
return mcp_server.call_tool(
"get_recipe_by_ingredients",
{"ingredients": ingredients}
)
elif "image" in query.lower() or "show" in query.lower():
recipe_name = "Classic Pancakes" # Simplified extraction
return mcp_server.call_tool(
"get_recipe_image",
{"recipe_name": recipe_name}
)
elif "convert" in query.lower():
# Simplified extraction
return mcp_server.call_tool(
"convert_measurements",
{"amount": 2, "from_unit": "cups", "to_unit": "ml"}
)
else:
# Fallback to database search
c = db_conn.cursor()
c.execute("SELECT * FROM recipes WHERE name LIKE ?", (f"%{query}%",))
return c.fetchall()
# ------ Register Tools with MCP Server ------
mcp_server.register_tool(
"get_recipe_by_ingredients",
get_recipe_by_ingredients,
"Find recipes based on available ingredients"
)
mcp_server.register_tool(
"get_recipe_image",
get_recipe_image,
"Generate an image of the finished recipe"
)
mcp_server.register_tool(
"convert_measurements",
convert_measurements,
"Convert cooking measurements between units"
)
# ------ Initialize System ------
db_conn = init_recipe_db()
# ------ Gradio Interface ------
def process_voice_command(audio):
"""Process voice command through the agent system"""
# Convert audio to text
query = speech_to_text(audio)
# Process query using agent logic
result = process_query(query, db_conn)
# Generate response text
if isinstance(result, list) and result:
response_text = f"Found {len(result)} recipes:\n"
for item in result:
response_text += f"- {item[1]} ({item[4]} mins)\n"
elif "recipes" in result:
response_text = f"Found {len(result['recipes'])} recipes:\n"
for recipe in result["recipes"]:
response_text += f"- {recipe['name']} ({recipe['time']} mins)\n"
elif "image_url" in result:
response_text = f"Here's an image of {result.get('alt_text', 'the recipe')}"
image = Image.new('RGB', (300, 200), color=(73, 109, 137))
else:
response_text = str(result)
image = None
# Convert response to audio
audio_data, sr = text_to_speech(response_text)
# Return results
return (
(sr, audio_data),
response_text,
image if 'image' in locals() else None
)
# ------ Hugging Face Space UI ------
with gr.Blocks(title="MCP Culinary Voice Assistant") as demo:
gr.Markdown("# 🧑🍳 MCP-Powered Culinary Voice Assistant (Open-Source)")
gr.Markdown("Speak to your cooking assistant about recipes, conversions, and more!")
with gr.Row():
audio_input = gr.Audio(source="microphone", type="numpy", label="Speak to Chef Assistant")
audio_output = gr.Audio(label="Assistant Response", interactive=False)
with gr.Row():
text_output = gr.Textbox(label="Transcription", interactive=False)
image_output = gr.Image(label="Recipe Image", interactive=False)
with gr.Row():
submit_btn = gr.Button("Process Command", variant="primary")
submit_btn.click(
fn=process_voice_command,
inputs=[audio_input],
outputs=[audio_output, text_output, image_output]
)
gr.Examples(
examples=[
["What can I make with eggs and flour?"],
["Show me how tomato soup looks"],
["Convert 2 cups to milliliters"]
],
inputs=[text_output],
label="Example Queries"
)
if __name__ == "__main__":
demo.launch() |