Spaces:
Running
Running
File size: 10,545 Bytes
4955f2d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 |
import re
import gradio as gr
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
# Re-used function to extract video ID
def _extract_video_id(youtube_url: str) -> str | None:
"""
Extracts the YouTube video ID from a URL.
Handles standard, shortened, embed URLs, and direct ID.
"""
# Standard URL: https://www.youtube.com/watch?v=VIDEO_ID
match = re.search(r"watch\?v=([^&]+)", youtube_url)
if match:
return match.group(1)
# Shortened URL: https://youtu.be/VIDEO_ID
match = re.search(r"youtu\.be/([^?&]+)", youtube_url)
if match:
return match.group(1)
# Embed URL: https://www.youtube.com/embed/VIDEO_ID
match = re.search(r"youtube\.com/embed/([^?&]+)", youtube_url)
if match:
return match.group(1)
# Video ID directly passed
if re.fullmatch(r"^[a-zA-Z0-9_-]{11}$", youtube_url):
return youtube_url
return None
def _parse_srt_to_text(srt_content: str) -> str:
"""
Parses SRT formatted string to extract plain text.
Removes timestamps, sequence numbers, and basic HTML formatting.
"""
text_lines = []
lines = srt_content.splitlines()
for line in lines:
if not line.strip() or line.strip().isdigit() or '-->' in line:
continue
line_text = re.sub(r'<[^>]+>', '', line)
text_lines.append(line_text.strip())
return " ".join(text_lines)
def get_youtube_transcript_official_api(video_url_or_id: str, api_key: str, target_language: str = 'en') -> str:
"""
Retrieves the transcript for a YouTube video using the official YouTube Data API v3.
This function is intended to be exposed as an MCP tool.
Args:
video_url_or_id (str): YouTube video URL or 11-character video ID.
api_key (str): Your YouTube Data API v3 key.
target_language (str): Preferred language code for the transcript (e.g., 'en', 'es'). Defaults to 'en'.
Returns:
str: The concatenated transcript text or an error message.
"""
video_id = _extract_video_id(video_url_or_id)
if not video_id:
return f"Error: Invalid YouTube video URL or ID: '{video_url_or_id}'. Could not extract a valid video ID."
if not api_key or not api_key.strip():
return "Error: YouTube Data API Key is missing. Please provide a valid API key for the 'api_key' argument."
try:
youtube = build('youtube', 'v3', developerKey=api_key)
except Exception as e:
return f"Error: Could not build YouTube API client. Check library installation. Details: {str(e)}"
try:
caption_request = youtube.captions().list(
part="snippet",
videoId=video_id
)
caption_response = caption_request.execute()
caption_id_to_download = None
found_lang_for_download = None
available_langs_details = []
for item in caption_response.get('items', []):
lang_code = item['snippet']['language']
lang_name = item['snippet'].get('name', 'N/A')
track_kind = item['snippet'].get('trackKind', 'N/A')
available_langs_details.append(
f"{lang_code} (Name: '{lang_name}', Type: {track_kind})")
if lang_code.lower() == target_language.lower():
caption_id_to_download = item['id']
found_lang_for_download = lang_code
break
if not caption_id_to_download and target_language.lower() != 'en':
for item in caption_response.get('items', []):
lang_code = item['snippet']['language']
if lang_code.lower() == 'en':
caption_id_to_download = item['id']
found_lang_for_download = lang_code
break
if not caption_id_to_download:
available_langs_str = "\n - ".join(
available_langs_details) if available_langs_details else "None listed (captions might be disabled, non-existent, or API access restricted)"
return (f"Error: No suitable caption track found for language '{target_language}' "
f"(or 'en' fallback) for video ID '{video_id}'.\n"
f"Available caption tracks:\n - {available_langs_str}")
download_request = youtube.captions().download(
id=caption_id_to_download,
tfmt='srt'
)
srt_transcript = download_request.execute()
plain_text_transcript = _parse_srt_to_text(srt_transcript)
if not plain_text_transcript.strip():
return (f"Notice: Transcript for video ID '{video_id}' (Language: {found_lang_for_download}) "
"was downloaded but appears empty after parsing. The SRT file might be malformed or contain no text.")
return plain_text_transcript
except HttpError as e:
error_content_bytes = e.content
error_details = "No additional details in error content."
if error_content_bytes:
try:
error_details = error_content_bytes.decode('utf-8')
except UnicodeDecodeError:
error_details = "Error content could not be decoded (non-UTF-8)."
status_code = e.resp.status
if status_code == 403:
if "quotaExceeded" in error_details.lower() or "daily limit exceeded" in error_details.lower():
return f"API Error (403): YouTube API quota exceeded. Details: {error_details}"
return (f"API Error (403): Forbidden. Check API Key ('api_key'), YouTube Data API v3 enablement, or video owner restrictions for video_id='{video_id}'. Details: {error_details}")
elif status_code == 404:
return (f"API Error (404): Not Found. Video ID '{video_id}' ('video_url_or_id') might be incorrect, private/deleted, or caption track missing. Details: {error_details}")
else:
return f"API Error ({status_code}): An API error occurred while processing video_id='{video_id}'. Details: {error_details}"
except Exception as e:
return f"Unexpected Error processing video_id='{video_id}': {type(e).__name__} - {str(e)}"
def gradio_interface_handler(video_url_or_id: str, api_key: str, language: str):
"""
Handler function for the Gradio interface that wraps the main transcript retrieval logic.
Type hints and this docstring help Gradio generate the MCP tool schema.
Args:
video_url_or_id (str): The YouTube video URL or its 11-character ID. This description will appear in the MCP tool schema for this argument.
api_key (str): The YouTube Data API v3 key. This description will appear in the MCP tool schema for this argument.
language (str): The preferred ISO 639-1 language code for the transcript (e.g., 'en', 'es'). Defaults to 'en'. This description will appear in the MCP tool schema for this argument.
Returns:
str: The fetched transcript or an error message. This defines the tool's output.
"""
if not video_url_or_id.strip():
return "Error: YouTube Video URL or ID ('video_url_or_id') input is empty. Please provide a valid URL or ID."
if not api_key.strip():
return "Error: YouTube API Key ('api_key') input is empty. Please provide your API key."
language_to_use = language.strip().lower(
) if language and language.strip() else 'en'
return get_youtube_transcript_official_api(video_url_or_id, api_key, language_to_use)
# Define Gradio input components
# The 'label' is for the UI, and 'placeholder' provides a hint.
# The descriptions for the MCP tool arguments are derived from the docstring of 'gradio_interface_handler'.
inputs = [
gr.Textbox(
label="YouTube Video URL or ID",
placeholder="e.g., https://www.youtube.com/watch?v=dQw4w9WgXcQ or dQw4w9WgXcQ"
),
gr.Textbox(
label="YouTube Data API Key",
type="password",
placeholder="Enter your API key (e.g., AIzaSy...)"
),
gr.Textbox(
label="Preferred Language Code",
value="en", # Default language
placeholder="e.g., en, es, fr, de"
)
]
# Define Gradio output component
# The 'label' is for the UI. The description for the MCP tool output is derived from the return type hint and docstring of 'gradio_interface_handler'.
outputs = gr.Textbox(
label="Transcript Output",
lines=15,
show_copy_button=True
)
# Create and launch the Gradio interface
demo = gr.Interface(
fn=gradio_interface_handler, # The function to wrap, with type hints and docstrings
inputs=inputs,
outputs=outputs,
title="YouTube Video Transcript Retriever (MCP Enabled)",
description=( # This is the main description for the Gradio UI and can also provide context for the tool.
"Enter a YouTube video URL/ID, your YouTube Data API Key, and a preferred language code "
"to fetch the video transcript. This interface also exposes an MCP tool for programmatic access. "
"The MCP tool's argument descriptions are generated from the function's docstring."
),
allow_flagging='never',
examples=[
["https://www.youtube.com/watch?v=dQw4w9WgXcQ", "YOUR_API_KEY_HERE", "en"],
["Mdcw3_s2T_s", "YOUR_API_KEY_HERE", "en"],
["https://www.youtube.com/watch?v=rokGy0huYEA", "YOUR_API_KEY_HERE", "ja"]
],
article=(
"**Using the Web Interface:**\n"
"1. Obtain a [YouTube Data API v3 key](https://developers.google.com/youtube/v3/getting-started).\n"
"2. Ensure the YouTube Data API v3 is enabled for your project in Google Cloud Console.\n"
"3. Paste the video URL/ID, your API key, and desired language code into the respective fields.\n"
"4. Click 'Submit' to retrieve the transcript.\n\n"
"**MCP Server Information:**\n"
"When launched with `mcp_server=True`, Gradio also starts an MCP server.\n"
"- The tool schema (including argument descriptions from the function's docstring) can typically be found at `/gradio_api/mcp/schema`.\n"
"- The MCP server endpoint is usually at `/gradio_api/mcp/sse`.\n"
"This allows AI models and other MCP clients to use the transcript retrieval functionality programmatically."
)
)
if __name__ == '__main__':
print("Gradio app starting...")
print("MCP Server integration is enabled via mcp_server=True.")
print(
"Ensure 'gradio[mcp]' is installed if you encounter issues related to MCP.")
demo.launch(mcp_server=True)
|