Nymbo commited on
Commit
11de92c
·
verified ·
1 Parent(s): b47b1e3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +241 -234
app.py CHANGED
@@ -6,21 +6,35 @@ import base64
6
  from PIL import Image
7
  import io
8
  import requests
9
- from mcp.client.sse import SSEServerParameters
10
- from mcp.jsonrpc.client import JsonRpcClient
11
- from mcp.client.base import ServerCapabilities
 
 
 
 
12
 
13
  ACCESS_TOKEN = os.getenv("HF_TOKEN")
14
- print("Access token loaded.")
 
 
 
 
 
 
 
 
 
 
15
 
16
  # Function to encode image to base64
17
  def encode_image(image_path):
18
  if not image_path:
19
- print("No image path provided")
20
  return None
21
 
22
  try:
23
- print(f"Encoding image from path: {image_path}")
24
 
25
  # If it's already a PIL Image
26
  if isinstance(image_path, Image.Image):
@@ -37,117 +51,134 @@ def encode_image(image_path):
37
  buffered = io.BytesIO()
38
  image.save(buffered, format="JPEG")
39
  img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
40
- print("Image encoded successfully")
41
  return img_str
42
  except Exception as e:
43
- print(f"Error encoding image: {e}")
44
  return None
45
 
46
- # MCP Client class for handling MCP server connections
47
  class MCPClient:
48
- def __init__(self, url):
49
- self.url = url
50
- self.client = None
51
- self.capabilities = None
52
- self.tools = None
53
 
54
- def connect(self):
 
55
  try:
56
- # Connect to the MCP server using SSE
57
- server_params = SSEServerParameters(url=self.url)
58
- self.client = JsonRpcClient(server_params)
59
- self.client.connect()
60
-
61
- # Get server capabilities
62
- self.capabilities = ServerCapabilities(self.client)
63
-
64
- # List available tools
65
- self.tools = self.capabilities.list_tools()
66
- print(f"Connected to MCP Server. Available tools: {[tool.name for tool in self.tools]}")
67
- return True
68
  except Exception as e:
69
- print(f"Error connecting to MCP server: {e}")
70
  return False
71
 
72
- def call_tool(self, tool_name, **kwargs):
73
- if not self.client or not self.tools:
74
- print("MCP client not initialized or no tools available")
75
- return None
76
-
77
- # Find the tool with the given name
78
- tool = next((t for t in self.tools if t.name == tool_name), None)
79
- if not tool:
80
- print(f"Tool '{tool_name}' not found")
81
- return None
82
 
83
  try:
84
- # Call the tool with the given arguments
85
- result = self.client.call_method("tools/call", {"name": tool_name, "arguments": kwargs})
86
- return result
 
 
 
 
 
 
 
 
 
87
  except Exception as e:
88
- print(f"Error calling tool '{tool_name}': {e}")
89
- return None
90
 
91
- def close(self):
92
- if self.client:
93
- try:
94
- self.client.close()
95
- print("MCP client connection closed")
96
- except Exception as e:
97
- print(f"Error closing MCP client connection: {e}")
98
-
99
- # Function to convert text to audio using Kokoro MCP server
100
- def text_to_audio(text, speed=1.0, mcp_url=None):
101
- """Convert text to audio using Kokoro MCP server if available.
102
-
103
- Args:
104
- text (str): Text to convert to speech
105
- speed (float): Speed multiplier for speech
106
- mcp_url (str): URL of the Kokoro MCP server
107
 
108
- Returns:
109
- tuple: (sample_rate, audio_array) or None if conversion fails
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
  """
111
- if not text or not mcp_url:
 
 
 
 
112
  return None
113
 
114
- try:
115
- # Connect to MCP server
116
- mcp_client = MCPClient(mcp_url)
117
- if not mcp_client.connect():
118
- return None
119
-
120
- # Call the text_to_audio tool
121
- result = mcp_client.call_tool("text_to_audio", text=text, speed=speed)
122
- mcp_client.close()
123
-
124
- if not result:
125
- return None
126
-
127
- # Process the result - convert base64 audio to numpy array
128
- import numpy as np
129
- import base64
130
-
131
- # Assuming the result contains base64-encoded WAV data
132
- audio_b64 = result
133
- audio_data = base64.b64decode(audio_b64)
134
-
135
- # Convert to numpy array - this is simplified and may need adjustment
136
- # based on the actual output format from the Kokoro MCP server
137
- import io
138
- import soundfile as sf
139
-
140
- audio_io = io.BytesIO(audio_data)
141
- audio_array, sample_rate = sf.read(audio_io)
142
-
143
- return (sample_rate, audio_array)
144
- except Exception as e:
145
- print(f"Error converting text to audio: {e}")
 
146
  return None
147
 
148
  def respond(
149
  message,
150
- image_files,
151
  history: list[tuple[str, str]],
152
  system_message,
153
  max_tokens,
@@ -160,35 +191,33 @@ def respond(
160
  custom_model,
161
  model_search_term,
162
  selected_model,
163
- mcp_server_url=None,
164
  tts_enabled=False,
165
- tts_speed=1.0
166
  ):
167
- print(f"Received message: {message}")
168
- print(f"Received {len(image_files) if image_files else 0} images")
169
- print(f"History: {history}")
170
- print(f"System message: {system_message}")
171
- print(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
172
- print(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}")
173
- print(f"Selected provider: {provider}")
174
- print(f"Custom API Key provided: {bool(custom_api_key.strip())}")
175
- print(f"Selected model (custom_model): {custom_model}")
176
- print(f"Model search term: {model_search_term}")
177
- print(f"Selected model from radio: {selected_model}")
178
- print(f"MCP Server URL: {mcp_server_url}")
179
- print(f"TTS Enabled: {tts_enabled}")
180
 
181
  # Determine which token to use
182
  token_to_use = custom_api_key if custom_api_key.strip() != "" else ACCESS_TOKEN
183
 
184
  if custom_api_key.strip() != "":
185
- print("USING CUSTOM API KEY: BYOK token provided by user is being used for authentication")
186
  else:
187
- print("USING DEFAULT API KEY: Environment variable HF_TOKEN is being used for authentication")
188
 
189
  # Initialize the Inference Client with the provider and appropriate token
190
  client = InferenceClient(token=token_to_use, provider=provider)
191
- print(f"Hugging Face Inference Client initialized with {provider} provider.")
192
 
193
  # Convert seed to None if -1 (meaning random)
194
  if seed == -1:
@@ -220,14 +249,14 @@ def respond(
220
  }
221
  })
222
  except Exception as e:
223
- print(f"Error encoding image: {e}")
224
  else:
225
  # Text-only message
226
  user_content = message
227
 
228
  # Prepare messages in the format expected by the API
229
  messages = [{"role": "system", "content": system_message}]
230
- print("Initial messages array constructed.")
231
 
232
  # Add conversation history to the context
233
  for val in history:
@@ -256,29 +285,29 @@ def respond(
256
  }
257
  })
258
  except Exception as e:
259
- print(f"Error encoding history image: {e}")
260
 
261
  messages.append({"role": "user", "content": history_content})
262
  else:
263
  # Regular text message
264
  messages.append({"role": "user", "content": user_part})
265
- print(f"Added user message to context (type: {type(user_part)})")
266
 
267
  if assistant_part:
268
  messages.append({"role": "assistant", "content": assistant_part})
269
- print(f"Added assistant message to context: {assistant_part}")
270
 
271
  # Append the latest user message
272
  messages.append({"role": "user", "content": user_content})
273
- print(f"Latest user message appended (content type: {type(user_content)})")
274
 
275
  # Determine which model to use, prioritizing custom_model if provided
276
  model_to_use = custom_model.strip() if custom_model.strip() != "" else selected_model
277
- print(f"Model selected for inference: {model_to_use}")
278
 
279
  # Start with an empty string to build the response as tokens stream in
280
  response = ""
281
- print(f"Sending request to {provider} provider.")
282
 
283
  # Prepare parameters for the chat completion request
284
  parameters = {
@@ -301,7 +330,7 @@ def respond(
301
  **parameters
302
  )
303
 
304
- print("Received tokens: ", end="", flush=True)
305
 
306
  # Process the streaming response
307
  for chunk in stream:
@@ -314,26 +343,28 @@ def respond(
314
  response += token_text
315
  yield response
316
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
317
  print()
318
  except Exception as e:
319
- print(f"Error during inference: {e}")
320
  response += f"\nError: {str(e)}"
321
  yield response
322
 
323
- print("Completed response generation.")
324
-
325
- # If TTS is enabled and we have a valid MCP server URL, convert response to audio
326
- if tts_enabled and mcp_server_url and response:
327
- try:
328
- print(f"Converting response to audio using MCP server: {mcp_server_url}")
329
- audio_data = text_to_audio(response, tts_speed, mcp_server_url)
330
- if audio_data:
331
- # Here we would need to handle returning both text and audio
332
- # This would require modifying the Gradio interface to support this
333
- print("Successfully converted text to audio")
334
- # For now, we'll just return the text response
335
- except Exception as e:
336
- print(f"Error converting text to audio: {e}")
337
 
338
  # Function to validate provider selection based on BYOK
339
  def validate_provider(api_key, provider):
@@ -341,21 +372,10 @@ def validate_provider(api_key, provider):
341
  return gr.update(value="hf-inference")
342
  return gr.update(value=provider)
343
 
344
- # Function to test MCP server connection
345
- def test_mcp_connection(mcp_url):
346
- if not mcp_url or not mcp_url.strip():
347
- return "Please enter an MCP server URL"
348
-
349
- try:
350
- mcp_client = MCPClient(mcp_url)
351
- if mcp_client.connect():
352
- tools = [tool.name for tool in mcp_client.tools]
353
- mcp_client.close()
354
- return f"Successfully connected to MCP server. Available tools: {', '.join(tools)}"
355
- else:
356
- return "Failed to connect to MCP server"
357
- except Exception as e:
358
- return f"Error connecting to MCP server: {str(e)}"
359
 
360
  # GRADIO UI
361
  with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
@@ -363,10 +383,10 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
363
  chatbot = gr.Chatbot(
364
  height=600,
365
  show_copy_button=True,
366
- placeholder="Select a model and begin chatting. Now supports multiple inference providers and multimodal inputs",
367
  layout="panel"
368
  )
369
- print("Chatbot interface created.")
370
 
371
  # Multimodal textbox for messages (combines text and file uploads)
372
  msg = gr.MultimodalTextbox(
@@ -511,94 +531,76 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
511
  )
512
 
513
  gr.Markdown("[View all Text-to-Text models](https://huggingface.co/models?inference_provider=all&pipeline_tag=text-generation&sort=trending) | [View all multimodal models](https://huggingface.co/models?inference_provider=all&pipeline_tag=image-text-to-text&sort=trending)")
514
-
515
- # New Accordion for MCP Settings
516
- with gr.Accordion("MCP Server Settings", open=False):
517
- mcp_server_url = gr.Textbox(
518
- value="",
519
- label="MCP Server URL",
520
- info="Enter the URL of an MCP server to connect to (e.g., https://example-kokoro-mcp.hf.space/gradio_api/mcp/sse)",
521
- placeholder="https://fdaudens-kokoro-mcp.hf.space/gradio_api/mcp/sse"
522
- )
523
 
524
- test_connection_btn = gr.Button("Test Connection")
525
- connection_status = gr.Textbox(
526
- label="Connection Status",
527
- interactive=False
528
- )
529
-
530
- tts_enabled = gr.Checkbox(
531
- label="Enable Text-to-Speech",
532
- value=False,
533
- info="Convert AI responses to speech using the Kokoro TTS service"
534
- )
535
-
536
- tts_speed = gr.Slider(
537
- minimum=0.5,
538
- maximum=2.0,
539
- value=1.0,
540
- step=0.1,
541
- label="Speech Speed"
542
- )
543
-
544
- gr.Markdown("""
545
- ### About MCP Support
546
-
547
- This app can connect to Model Context Protocol (MCP) servers to extend its capabilities.
548
-
549
- For example, connecting to a Kokoro MCP server allows for text-to-speech conversion.
550
-
551
- To use this feature:
552
- 1. Enter the MCP server URL
553
- 2. Test the connection
554
- 3. Enable the desired features (e.g., TTS)
555
- 4. Chat normally with the AI
556
-
557
- Note: TTS functionality requires an active connection to a Kokoro MCP server.
558
- """)
559
 
560
  # Chat history state
561
  chat_history = gr.State([])
562
 
563
- # Connect the test connection button
564
- test_connection_btn.click(
565
- fn=test_mcp_connection,
566
- inputs=[mcp_server_url],
567
- outputs=[connection_status]
568
- )
569
-
570
  # Function to filter models
571
  def filter_models(search_term):
572
- print(f"Filtering models with search term: {search_term}")
573
  filtered = [m for m in models_list if search_term.lower() in m.lower()]
574
- print(f"Filtered models: {filtered}")
575
  return gr.update(choices=filtered)
576
 
577
  # Function to set custom model from radio
578
  def set_custom_model_from_radio(selected):
579
- print(f"Featured model selected: {selected}")
580
  return selected
581
 
582
  # Function for the chat interface
583
  def user(user_message, history):
584
  # Debug logging for troubleshooting
585
- print(f"User message received: {user_message}")
586
 
587
  # Skip if message is empty (no text and no files)
588
  if not user_message or (not user_message.get("text") and not user_message.get("files")):
589
- print("Empty message, skipping")
590
  return history
591
 
592
  # Prepare multimodal message format
593
  text_content = user_message.get("text", "").strip()
594
  files = user_message.get("files", [])
595
 
596
- print(f"Text content: {text_content}")
597
- print(f"Files: {files}")
598
 
599
  # If both text and files are empty, skip
600
  if not text_content and not files:
601
- print("No content to display")
602
  return history
603
 
604
  # Add message with images to history
@@ -606,33 +608,33 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
606
  # Add text message first if it exists
607
  if text_content:
608
  # Add a separate text message
609
- print(f"Adding text message: {text_content}")
610
  history.append([text_content, None])
611
 
612
  # Then add each image file separately
613
  for file_path in files:
614
  if file_path and isinstance(file_path, str):
615
- print(f"Adding image: {file_path}")
616
  # Add image as a separate message with no text
617
  history.append([f"![Image]({file_path})", None])
618
 
619
  return history
620
  else:
621
  # For text-only messages
622
- print(f"Adding text-only message: {text_content}")
623
  history.append([text_content, None])
624
  return history
625
 
626
  # Define bot response function
627
- def bot(history, system_msg, max_tokens, temperature, top_p, freq_penalty, seed, provider, api_key, custom_model, search_term, selected_model, mcp_url, tts_on, tts_spd):
628
  # Check if history is valid
629
  if not history or len(history) == 0:
630
- print("No history to process")
631
  return history
632
 
633
  # Get the most recent message and detect if it's an image
634
  user_message = history[-1][0]
635
- print(f"Processing user message: {user_message}")
636
 
637
  is_image = False
638
  image_path = None
@@ -643,7 +645,7 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
643
  is_image = True
644
  # Extract image path from markdown format ![Image](path)
645
  image_path = user_message.replace("![Image](", "").replace(")", "")
646
- print(f"Image detected: {image_path}")
647
  text_content = "" # No text for image-only messages
648
 
649
  # Look back for text context if this is an image
@@ -653,7 +655,7 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
653
  prev_message = history[-2][0]
654
  if isinstance(prev_message, str) and not prev_message.startswith("![Image]("):
655
  text_context = prev_message
656
- print(f"Using text context from previous message: {text_context}")
657
 
658
  # Process message through respond function
659
  history[-1][1] = ""
@@ -676,9 +678,8 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
676
  custom_model,
677
  search_term,
678
  selected_model,
679
- mcp_url,
680
- tts_on,
681
- tts_spd
682
  ):
683
  history[-1][1] = response
684
  yield history
@@ -699,9 +700,8 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
699
  custom_model,
700
  search_term,
701
  selected_model,
702
- mcp_url,
703
- tts_on,
704
- tts_spd
705
  ):
706
  history[-1][1] = response
707
  yield history
@@ -716,7 +716,7 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
716
  bot,
717
  [chatbot, system_message_box, max_tokens_slider, temperature_slider, top_p_slider,
718
  frequency_penalty_slider, seed_slider, provider_radio, byok_textbox, custom_model_box,
719
- model_search_box, featured_model_radio, mcp_server_url, tts_enabled, tts_speed],
720
  [chatbot]
721
  ).then(
722
  lambda: {"text": "", "files": []}, # Clear inputs after submission
@@ -730,7 +730,7 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
730
  inputs=model_search_box,
731
  outputs=featured_model_radio
732
  )
733
- print("Model search box change event linked.")
734
 
735
  # Connect the featured model radio to update the custom model box
736
  featured_model_radio.change(
@@ -738,7 +738,7 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
738
  inputs=featured_model_radio,
739
  outputs=custom_model_box
740
  )
741
- print("Featured model radio button change event linked.")
742
 
743
  # Connect the BYOK textbox to validate provider selection
744
  byok_textbox.change(
@@ -746,7 +746,7 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
746
  inputs=[byok_textbox, provider_radio],
747
  outputs=provider_radio
748
  )
749
- print("BYOK textbox change event linked.")
750
 
751
  # Also validate provider when the radio changes to ensure consistency
752
  provider_radio.change(
@@ -754,10 +754,17 @@ with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
754
  inputs=[byok_textbox, provider_radio],
755
  outputs=provider_radio
756
  )
757
- print("Provider radio button change event linked.")
 
 
 
 
 
 
 
758
 
759
- print("Gradio interface initialized.")
760
 
761
  if __name__ == "__main__":
762
- print("Launching the demo application.")
763
  demo.launch(show_api=True)
 
6
  from PIL import Image
7
  import io
8
  import requests
9
+ from typing import Dict, List, Optional, Any, Union
10
+ import time
11
+ import logging
12
+
13
+ # Setup logging
14
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
15
+ logger = logging.getLogger(__name__)
16
 
17
  ACCESS_TOKEN = os.getenv("HF_TOKEN")
18
+ logger.info("Access token loaded.")
19
+
20
+ # MCP Client Configuration
21
+ MCP_SERVERS = {}
22
+ try:
23
+ mcp_config = os.getenv("MCP_CONFIG")
24
+ if mcp_config:
25
+ MCP_SERVERS = json.loads(mcp_config)
26
+ logger.info(f"Loaded MCP configuration: {len(MCP_SERVERS)} servers defined")
27
+ except Exception as e:
28
+ logger.error(f"Error loading MCP configuration: {e}")
29
 
30
  # Function to encode image to base64
31
  def encode_image(image_path):
32
  if not image_path:
33
+ logger.warning("No image path provided")
34
  return None
35
 
36
  try:
37
+ logger.info(f"Encoding image from path: {image_path}")
38
 
39
  # If it's already a PIL Image
40
  if isinstance(image_path, Image.Image):
 
51
  buffered = io.BytesIO()
52
  image.save(buffered, format="JPEG")
53
  img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
54
+ logger.info("Image encoded successfully")
55
  return img_str
56
  except Exception as e:
57
+ logger.error(f"Error encoding image: {e}")
58
  return None
59
 
60
+ # MCP Client implementation
61
  class MCPClient:
62
+ def __init__(self, server_url: str):
63
+ self.server_url = server_url
64
+ self.session_id = None
65
+ logger.info(f"Initialized MCP Client for server: {server_url}")
 
66
 
67
+ def connect(self) -> bool:
68
+ """Establish connection with the MCP server"""
69
  try:
70
+ response = requests.post(
71
+ f"{self.server_url}/connect",
72
+ json={"client": "Serverless-TextGen-Hub", "version": "1.0.0"}
73
+ )
74
+ if response.status_code == 200:
75
+ result = response.json()
76
+ self.session_id = result.get("session_id")
77
+ logger.info(f"Connected to MCP server with session ID: {self.session_id}")
78
+ return True
79
+ else:
80
+ logger.error(f"Failed to connect to MCP server: {response.status_code} - {response.text}")
81
+ return False
82
  except Exception as e:
83
+ logger.error(f"Error connecting to MCP server: {e}")
84
  return False
85
 
86
+ def list_tools(self) -> List[Dict]:
87
+ """List available tools from the MCP server"""
88
+ if not self.session_id:
89
+ if not self.connect():
90
+ return []
 
 
 
 
 
91
 
92
  try:
93
+ response = requests.get(
94
+ f"{self.server_url}/tools/list",
95
+ headers={"X-MCP-Session": self.session_id}
96
+ )
97
+ if response.status_code == 200:
98
+ result = response.json()
99
+ tools = result.get("tools", [])
100
+ logger.info(f"Retrieved {len(tools)} tools from MCP server")
101
+ return tools
102
+ else:
103
+ logger.error(f"Failed to list tools: {response.status_code} - {response.text}")
104
+ return []
105
  except Exception as e:
106
+ logger.error(f"Error listing tools: {e}")
107
+ return []
108
 
109
+ def call_tool(self, tool_name: str, args: Dict) -> Dict:
110
+ """Call a tool on the MCP server"""
111
+ if not self.session_id:
112
+ if not self.connect():
113
+ return {"error": "Not connected to MCP server"}
 
 
 
 
 
 
 
 
 
 
 
114
 
115
+ try:
116
+ response = requests.post(
117
+ f"{self.server_url}/tools/call",
118
+ headers={"X-MCP-Session": self.session_id},
119
+ json={"name": tool_name, "arguments": args}
120
+ )
121
+ if response.status_code == 200:
122
+ result = response.json()
123
+ logger.info(f"Successfully called tool {tool_name}")
124
+ return result
125
+ else:
126
+ error_msg = f"Failed to call tool {tool_name}: {response.status_code} - {response.text}"
127
+ logger.error(error_msg)
128
+ return {"error": error_msg}
129
+ except Exception as e:
130
+ error_msg = f"Error calling tool {tool_name}: {e}"
131
+ logger.error(error_msg)
132
+ return {"error": error_msg}
133
+
134
+ # Text-to-speech client function
135
+ def text_to_speech(text: str, server_name: str = None) -> Optional[str]:
136
  """
137
+ Convert text to speech using an MCP TTS server
138
+ Returns an audio URL that can be embedded in the chat
139
+ """
140
+ if not server_name or server_name not in MCP_SERVERS:
141
+ logger.warning(f"TTS server {server_name} not configured")
142
  return None
143
 
144
+ server_url = MCP_SERVERS[server_name].get("url")
145
+ if not server_url:
146
+ logger.warning(f"No URL found for TTS server {server_name}")
147
+ return None
148
+
149
+ client = MCPClient(server_url)
150
+
151
+ # List available tools to find the TTS tool
152
+ tools = client.list_tools()
153
+ tts_tool = next((t for t in tools if "text_to_audio" in t["name"] or "tts" in t["name"]), None)
154
+
155
+ if not tts_tool:
156
+ logger.warning(f"No TTS tool found on server {server_name}")
157
+ return None
158
+
159
+ # Call the TTS tool
160
+ result = client.call_tool(tts_tool["name"], {"text": text, "speed": 1.0})
161
+
162
+ if "error" in result:
163
+ logger.error(f"TTS error: {result['error']}")
164
+ return None
165
+
166
+ # Process the result - usually a base64 encoded WAV
167
+ audio_data = result.get("audio") or result.get("content") or result.get("result")
168
+
169
+ if isinstance(audio_data, str) and audio_data.startswith("data:audio"):
170
+ # Already a data URL
171
+ return audio_data
172
+ elif isinstance(audio_data, str):
173
+ # Assume it's base64 encoded
174
+ return f"data:audio/wav;base64,{audio_data}"
175
+ else:
176
+ logger.error(f"Unexpected TTS result format: {type(audio_data)}")
177
  return None
178
 
179
  def respond(
180
  message,
181
+ image_files, # Changed parameter name and structure
182
  history: list[tuple[str, str]],
183
  system_message,
184
  max_tokens,
 
191
  custom_model,
192
  model_search_term,
193
  selected_model,
 
194
  tts_enabled=False,
195
+ tts_server=None
196
  ):
197
+ logger.info(f"Received message: {message}")
198
+ logger.info(f"Received {len(image_files) if image_files else 0} images")
199
+ logger.info(f"History: {history}")
200
+ logger.info(f"System message: {system_message}")
201
+ logger.info(f"Max tokens: {max_tokens}, Temperature: {temperature}, Top-P: {top_p}")
202
+ logger.info(f"Frequency Penalty: {frequency_penalty}, Seed: {seed}")
203
+ logger.info(f"Selected provider: {provider}")
204
+ logger.info(f"Custom API Key provided: {bool(custom_api_key.strip())}")
205
+ logger.info(f"Selected model (custom_model): {custom_model}")
206
+ logger.info(f"Model search term: {model_search_term}")
207
+ logger.info(f"Selected model from radio: {selected_model}")
208
+ logger.info(f"TTS enabled: {tts_enabled}, TTS server: {tts_server}")
 
209
 
210
  # Determine which token to use
211
  token_to_use = custom_api_key if custom_api_key.strip() != "" else ACCESS_TOKEN
212
 
213
  if custom_api_key.strip() != "":
214
+ logger.info("USING CUSTOM API KEY: BYOK token provided by user is being used for authentication")
215
  else:
216
+ logger.info("USING DEFAULT API KEY: Environment variable HF_TOKEN is being used for authentication")
217
 
218
  # Initialize the Inference Client with the provider and appropriate token
219
  client = InferenceClient(token=token_to_use, provider=provider)
220
+ logger.info(f"Hugging Face Inference Client initialized with {provider} provider.")
221
 
222
  # Convert seed to None if -1 (meaning random)
223
  if seed == -1:
 
249
  }
250
  })
251
  except Exception as e:
252
+ logger.error(f"Error encoding image: {e}")
253
  else:
254
  # Text-only message
255
  user_content = message
256
 
257
  # Prepare messages in the format expected by the API
258
  messages = [{"role": "system", "content": system_message}]
259
+ logger.info("Initial messages array constructed.")
260
 
261
  # Add conversation history to the context
262
  for val in history:
 
285
  }
286
  })
287
  except Exception as e:
288
+ logger.error(f"Error encoding history image: {e}")
289
 
290
  messages.append({"role": "user", "content": history_content})
291
  else:
292
  # Regular text message
293
  messages.append({"role": "user", "content": user_part})
294
+ logger.info(f"Added user message to context (type: {type(user_part)})")
295
 
296
  if assistant_part:
297
  messages.append({"role": "assistant", "content": assistant_part})
298
+ logger.info(f"Added assistant message to context: {assistant_part}")
299
 
300
  # Append the latest user message
301
  messages.append({"role": "user", "content": user_content})
302
+ logger.info(f"Latest user message appended (content type: {type(user_content)})")
303
 
304
  # Determine which model to use, prioritizing custom_model if provided
305
  model_to_use = custom_model.strip() if custom_model.strip() != "" else selected_model
306
+ logger.info(f"Model selected for inference: {model_to_use}")
307
 
308
  # Start with an empty string to build the response as tokens stream in
309
  response = ""
310
+ logger.info(f"Sending request to {provider} provider.")
311
 
312
  # Prepare parameters for the chat completion request
313
  parameters = {
 
330
  **parameters
331
  )
332
 
333
+ logger.info("Received tokens: ")
334
 
335
  # Process the streaming response
336
  for chunk in stream:
 
343
  response += token_text
344
  yield response
345
 
346
+ # If TTS is enabled and we have a response, convert it to speech
347
+ if tts_enabled and tts_server and response:
348
+ logger.info(f"Converting response to speech using TTS server: {tts_server}")
349
+ try:
350
+ audio_url = text_to_speech(response, tts_server)
351
+ if audio_url:
352
+ # Add audio tag to the end of the response
353
+ response += f"\n\n<audio src='{audio_url}' controls></audio>"
354
+ yield response
355
+ else:
356
+ logger.warning("TTS conversion failed, continuing without audio")
357
+ except Exception as e:
358
+ logger.error(f"Error in TTS conversion: {e}")
359
+ # Continue without TTS if there's an error
360
+
361
  print()
362
  except Exception as e:
363
+ logger.error(f"Error during inference: {e}")
364
  response += f"\nError: {str(e)}"
365
  yield response
366
 
367
+ logger.info("Completed response generation.")
 
 
 
 
 
 
 
 
 
 
 
 
 
368
 
369
  # Function to validate provider selection based on BYOK
370
  def validate_provider(api_key, provider):
 
372
  return gr.update(value="hf-inference")
373
  return gr.update(value=provider)
374
 
375
+ # Function to list available MCP servers
376
+ def list_mcp_servers():
377
+ """List all configured MCP servers"""
378
+ return list(MCP_SERVERS.keys())
 
 
 
 
 
 
 
 
 
 
 
379
 
380
  # GRADIO UI
381
  with gr.Blocks(theme="Nymbo/Nymbo_Theme") as demo:
 
383
  chatbot = gr.Chatbot(
384
  height=600,
385
  show_copy_button=True,
386
+ placeholder="Select a model and begin chatting. Now supports multiple inference providers, multimodal inputs, and MCP servers",
387
  layout="panel"
388
  )
389
+ logger.info("Chatbot interface created.")
390
 
391
  # Multimodal textbox for messages (combines text and file uploads)
392
  msg = gr.MultimodalTextbox(
 
531
  )
532
 
533
  gr.Markdown("[View all Text-to-Text models](https://huggingface.co/models?inference_provider=all&pipeline_tag=text-generation&sort=trending) | [View all multimodal models](https://huggingface.co/models?inference_provider=all&pipeline_tag=image-text-to-text&sort=trending)")
 
 
 
 
 
 
 
 
 
534
 
535
+ # MCP TTS integration
536
+ with gr.Accordion("MCP Integration", open=False):
537
+ gr.Markdown("## Model Context Protocol (MCP) Integration")
538
+ gr.Markdown("Connect to MCP servers to extend functionality.")
539
+
540
+ tts_enabled = gr.Checkbox(
541
+ label="Enable Text-to-Speech",
542
+ value=False,
543
+ info="When enabled, responses will be converted to speech using the selected MCP TTS server"
544
+ )
545
+
546
+ # Create dropdown for available MCP servers
547
+ available_servers = list_mcp_servers()
548
+ tts_server = gr.Dropdown(
549
+ label="TTS Server",
550
+ choices=available_servers,
551
+ value=available_servers[0] if available_servers else None,
552
+ interactive=True,
553
+ visible=len(available_servers) > 0
554
+ )
555
+
556
+ # If no servers configured, show a message
557
+ if not available_servers:
558
+ gr.Markdown("""
559
+ No MCP servers configured. Add them using the MCP_CONFIG environment variable:
560
+ ```json
561
+ {
562
+ "kokoroTTS": {
563
+ "url": "https://your-kokoro-tts-server/gradio_api/mcp/sse"
564
+ }
565
+ }
566
+ ```
567
+ """)
 
 
568
 
569
  # Chat history state
570
  chat_history = gr.State([])
571
 
 
 
 
 
 
 
 
572
  # Function to filter models
573
  def filter_models(search_term):
574
+ logger.info(f"Filtering models with search term: {search_term}")
575
  filtered = [m for m in models_list if search_term.lower() in m.lower()]
576
+ logger.info(f"Filtered models: {filtered}")
577
  return gr.update(choices=filtered)
578
 
579
  # Function to set custom model from radio
580
  def set_custom_model_from_radio(selected):
581
+ logger.info(f"Featured model selected: {selected}")
582
  return selected
583
 
584
  # Function for the chat interface
585
  def user(user_message, history):
586
  # Debug logging for troubleshooting
587
+ logger.info(f"User message received: {user_message}")
588
 
589
  # Skip if message is empty (no text and no files)
590
  if not user_message or (not user_message.get("text") and not user_message.get("files")):
591
+ logger.info("Empty message, skipping")
592
  return history
593
 
594
  # Prepare multimodal message format
595
  text_content = user_message.get("text", "").strip()
596
  files = user_message.get("files", [])
597
 
598
+ logger.info(f"Text content: {text_content}")
599
+ logger.info(f"Files: {files}")
600
 
601
  # If both text and files are empty, skip
602
  if not text_content and not files:
603
+ logger.info("No content to display")
604
  return history
605
 
606
  # Add message with images to history
 
608
  # Add text message first if it exists
609
  if text_content:
610
  # Add a separate text message
611
+ logger.info(f"Adding text message: {text_content}")
612
  history.append([text_content, None])
613
 
614
  # Then add each image file separately
615
  for file_path in files:
616
  if file_path and isinstance(file_path, str):
617
+ logger.info(f"Adding image: {file_path}")
618
  # Add image as a separate message with no text
619
  history.append([f"![Image]({file_path})", None])
620
 
621
  return history
622
  else:
623
  # For text-only messages
624
+ logger.info(f"Adding text-only message: {text_content}")
625
  history.append([text_content, None])
626
  return history
627
 
628
  # Define bot response function
629
+ def bot(history, system_msg, max_tokens, temperature, top_p, freq_penalty, seed, provider, api_key, custom_model, search_term, selected_model, tts_enabled, tts_server):
630
  # Check if history is valid
631
  if not history or len(history) == 0:
632
+ logger.info("No history to process")
633
  return history
634
 
635
  # Get the most recent message and detect if it's an image
636
  user_message = history[-1][0]
637
+ logger.info(f"Processing user message: {user_message}")
638
 
639
  is_image = False
640
  image_path = None
 
645
  is_image = True
646
  # Extract image path from markdown format ![Image](path)
647
  image_path = user_message.replace("![Image](", "").replace(")", "")
648
+ logger.info(f"Image detected: {image_path}")
649
  text_content = "" # No text for image-only messages
650
 
651
  # Look back for text context if this is an image
 
655
  prev_message = history[-2][0]
656
  if isinstance(prev_message, str) and not prev_message.startswith("![Image]("):
657
  text_context = prev_message
658
+ logger.info(f"Using text context from previous message: {text_context}")
659
 
660
  # Process message through respond function
661
  history[-1][1] = ""
 
678
  custom_model,
679
  search_term,
680
  selected_model,
681
+ tts_enabled,
682
+ tts_server
 
683
  ):
684
  history[-1][1] = response
685
  yield history
 
700
  custom_model,
701
  search_term,
702
  selected_model,
703
+ tts_enabled,
704
+ tts_server
 
705
  ):
706
  history[-1][1] = response
707
  yield history
 
716
  bot,
717
  [chatbot, system_message_box, max_tokens_slider, temperature_slider, top_p_slider,
718
  frequency_penalty_slider, seed_slider, provider_radio, byok_textbox, custom_model_box,
719
+ model_search_box, featured_model_radio, tts_enabled, tts_server],
720
  [chatbot]
721
  ).then(
722
  lambda: {"text": "", "files": []}, # Clear inputs after submission
 
730
  inputs=model_search_box,
731
  outputs=featured_model_radio
732
  )
733
+ logger.info("Model search box change event linked.")
734
 
735
  # Connect the featured model radio to update the custom model box
736
  featured_model_radio.change(
 
738
  inputs=featured_model_radio,
739
  outputs=custom_model_box
740
  )
741
+ logger.info("Featured model radio button change event linked.")
742
 
743
  # Connect the BYOK textbox to validate provider selection
744
  byok_textbox.change(
 
746
  inputs=[byok_textbox, provider_radio],
747
  outputs=provider_radio
748
  )
749
+ logger.info("BYOK textbox change event linked.")
750
 
751
  # Also validate provider when the radio changes to ensure consistency
752
  provider_radio.change(
 
754
  inputs=[byok_textbox, provider_radio],
755
  outputs=provider_radio
756
  )
757
+ logger.info("Provider radio button change event linked.")
758
+
759
+ # Update TTS server dropdown visibility based on the TTS toggle
760
+ tts_enabled.change(
761
+ lambda enabled: gr.update(visible=enabled and len(list_mcp_servers()) > 0),
762
+ inputs=tts_enabled,
763
+ outputs=tts_server
764
+ )
765
 
766
+ logger.info("Gradio interface initialized.")
767
 
768
  if __name__ == "__main__":
769
+ logger.info("Launching the demo application.")
770
  demo.launch(show_api=True)