mcp-youtube-transcript / app via API.py
kirbah's picture
Better descriptions
4955f2d
import re
import gradio as gr
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
# Re-used function to extract video ID
def _extract_video_id(youtube_url: str) -> str | None:
"""
Extracts the YouTube video ID from a URL.
Handles standard, shortened, embed URLs, and direct ID.
"""
# Standard URL: https://www.youtube.com/watch?v=VIDEO_ID
match = re.search(r"watch\?v=([^&]+)", youtube_url)
if match:
return match.group(1)
# Shortened URL: https://youtu.be/VIDEO_ID
match = re.search(r"youtu\.be/([^?&]+)", youtube_url)
if match:
return match.group(1)
# Embed URL: https://www.youtube.com/embed/VIDEO_ID
match = re.search(r"youtube\.com/embed/([^?&]+)", youtube_url)
if match:
return match.group(1)
# Video ID directly passed
if re.fullmatch(r"^[a-zA-Z0-9_-]{11}$", youtube_url):
return youtube_url
return None
def _parse_srt_to_text(srt_content: str) -> str:
"""
Parses SRT formatted string to extract plain text.
Removes timestamps, sequence numbers, and basic HTML formatting.
"""
text_lines = []
lines = srt_content.splitlines()
for line in lines:
if not line.strip() or line.strip().isdigit() or '-->' in line:
continue
line_text = re.sub(r'<[^>]+>', '', line)
text_lines.append(line_text.strip())
return " ".join(text_lines)
def get_youtube_transcript_official_api(video_url_or_id: str, api_key: str, target_language: str = 'en') -> str:
"""
Retrieves the transcript for a YouTube video using the official YouTube Data API v3.
This function is intended to be exposed as an MCP tool.
Args:
video_url_or_id (str): YouTube video URL or 11-character video ID.
api_key (str): Your YouTube Data API v3 key.
target_language (str): Preferred language code for the transcript (e.g., 'en', 'es'). Defaults to 'en'.
Returns:
str: The concatenated transcript text or an error message.
"""
video_id = _extract_video_id(video_url_or_id)
if not video_id:
return f"Error: Invalid YouTube video URL or ID: '{video_url_or_id}'. Could not extract a valid video ID."
if not api_key or not api_key.strip():
return "Error: YouTube Data API Key is missing. Please provide a valid API key for the 'api_key' argument."
try:
youtube = build('youtube', 'v3', developerKey=api_key)
except Exception as e:
return f"Error: Could not build YouTube API client. Check library installation. Details: {str(e)}"
try:
caption_request = youtube.captions().list(
part="snippet",
videoId=video_id
)
caption_response = caption_request.execute()
caption_id_to_download = None
found_lang_for_download = None
available_langs_details = []
for item in caption_response.get('items', []):
lang_code = item['snippet']['language']
lang_name = item['snippet'].get('name', 'N/A')
track_kind = item['snippet'].get('trackKind', 'N/A')
available_langs_details.append(
f"{lang_code} (Name: '{lang_name}', Type: {track_kind})")
if lang_code.lower() == target_language.lower():
caption_id_to_download = item['id']
found_lang_for_download = lang_code
break
if not caption_id_to_download and target_language.lower() != 'en':
for item in caption_response.get('items', []):
lang_code = item['snippet']['language']
if lang_code.lower() == 'en':
caption_id_to_download = item['id']
found_lang_for_download = lang_code
break
if not caption_id_to_download:
available_langs_str = "\n - ".join(
available_langs_details) if available_langs_details else "None listed (captions might be disabled, non-existent, or API access restricted)"
return (f"Error: No suitable caption track found for language '{target_language}' "
f"(or 'en' fallback) for video ID '{video_id}'.\n"
f"Available caption tracks:\n - {available_langs_str}")
download_request = youtube.captions().download(
id=caption_id_to_download,
tfmt='srt'
)
srt_transcript = download_request.execute()
plain_text_transcript = _parse_srt_to_text(srt_transcript)
if not plain_text_transcript.strip():
return (f"Notice: Transcript for video ID '{video_id}' (Language: {found_lang_for_download}) "
"was downloaded but appears empty after parsing. The SRT file might be malformed or contain no text.")
return plain_text_transcript
except HttpError as e:
error_content_bytes = e.content
error_details = "No additional details in error content."
if error_content_bytes:
try:
error_details = error_content_bytes.decode('utf-8')
except UnicodeDecodeError:
error_details = "Error content could not be decoded (non-UTF-8)."
status_code = e.resp.status
if status_code == 403:
if "quotaExceeded" in error_details.lower() or "daily limit exceeded" in error_details.lower():
return f"API Error (403): YouTube API quota exceeded. Details: {error_details}"
return (f"API Error (403): Forbidden. Check API Key ('api_key'), YouTube Data API v3 enablement, or video owner restrictions for video_id='{video_id}'. Details: {error_details}")
elif status_code == 404:
return (f"API Error (404): Not Found. Video ID '{video_id}' ('video_url_or_id') might be incorrect, private/deleted, or caption track missing. Details: {error_details}")
else:
return f"API Error ({status_code}): An API error occurred while processing video_id='{video_id}'. Details: {error_details}"
except Exception as e:
return f"Unexpected Error processing video_id='{video_id}': {type(e).__name__} - {str(e)}"
def gradio_interface_handler(video_url_or_id: str, api_key: str, language: str):
"""
Handler function for the Gradio interface that wraps the main transcript retrieval logic.
Type hints and this docstring help Gradio generate the MCP tool schema.
Args:
video_url_or_id (str): The YouTube video URL or its 11-character ID. This description will appear in the MCP tool schema for this argument.
api_key (str): The YouTube Data API v3 key. This description will appear in the MCP tool schema for this argument.
language (str): The preferred ISO 639-1 language code for the transcript (e.g., 'en', 'es'). Defaults to 'en'. This description will appear in the MCP tool schema for this argument.
Returns:
str: The fetched transcript or an error message. This defines the tool's output.
"""
if not video_url_or_id.strip():
return "Error: YouTube Video URL or ID ('video_url_or_id') input is empty. Please provide a valid URL or ID."
if not api_key.strip():
return "Error: YouTube API Key ('api_key') input is empty. Please provide your API key."
language_to_use = language.strip().lower(
) if language and language.strip() else 'en'
return get_youtube_transcript_official_api(video_url_or_id, api_key, language_to_use)
# Define Gradio input components
# The 'label' is for the UI, and 'placeholder' provides a hint.
# The descriptions for the MCP tool arguments are derived from the docstring of 'gradio_interface_handler'.
inputs = [
gr.Textbox(
label="YouTube Video URL or ID",
placeholder="e.g., https://www.youtube.com/watch?v=dQw4w9WgXcQ or dQw4w9WgXcQ"
),
gr.Textbox(
label="YouTube Data API Key",
type="password",
placeholder="Enter your API key (e.g., AIzaSy...)"
),
gr.Textbox(
label="Preferred Language Code",
value="en", # Default language
placeholder="e.g., en, es, fr, de"
)
]
# Define Gradio output component
# The 'label' is for the UI. The description for the MCP tool output is derived from the return type hint and docstring of 'gradio_interface_handler'.
outputs = gr.Textbox(
label="Transcript Output",
lines=15,
show_copy_button=True
)
# Create and launch the Gradio interface
demo = gr.Interface(
fn=gradio_interface_handler, # The function to wrap, with type hints and docstrings
inputs=inputs,
outputs=outputs,
title="YouTube Video Transcript Retriever (MCP Enabled)",
description=( # This is the main description for the Gradio UI and can also provide context for the tool.
"Enter a YouTube video URL/ID, your YouTube Data API Key, and a preferred language code "
"to fetch the video transcript. This interface also exposes an MCP tool for programmatic access. "
"The MCP tool's argument descriptions are generated from the function's docstring."
),
allow_flagging='never',
examples=[
["https://www.youtube.com/watch?v=dQw4w9WgXcQ", "YOUR_API_KEY_HERE", "en"],
["Mdcw3_s2T_s", "YOUR_API_KEY_HERE", "en"],
["https://www.youtube.com/watch?v=rokGy0huYEA", "YOUR_API_KEY_HERE", "ja"]
],
article=(
"**Using the Web Interface:**\n"
"1. Obtain a [YouTube Data API v3 key](https://developers.google.com/youtube/v3/getting-started).\n"
"2. Ensure the YouTube Data API v3 is enabled for your project in Google Cloud Console.\n"
"3. Paste the video URL/ID, your API key, and desired language code into the respective fields.\n"
"4. Click 'Submit' to retrieve the transcript.\n\n"
"**MCP Server Information:**\n"
"When launched with `mcp_server=True`, Gradio also starts an MCP server.\n"
"- The tool schema (including argument descriptions from the function's docstring) can typically be found at `/gradio_api/mcp/schema`.\n"
"- The MCP server endpoint is usually at `/gradio_api/mcp/sse`.\n"
"This allows AI models and other MCP clients to use the transcript retrieval functionality programmatically."
)
)
if __name__ == '__main__':
print("Gradio app starting...")
print("MCP Server integration is enabled via mcp_server=True.")
print(
"Ensure 'gradio[mcp]' is installed if you encounter issues related to MCP.")
demo.launch(mcp_server=True)