Spaces:

kirbah
/

mcp-youtube-transcript

Running

App Files Files Community

mcp-youtube-transcript / app via API.py

kirbah

Better descriptions

4955f2d 3 days ago

raw

history blame contribute delete

10.5 kB

	import re
	import gradio as gr
	from googleapiclient.discovery import build
	from googleapiclient.errors import HttpError

	# Re-used function to extract video ID


	def _extract_video_id(youtube_url: str) -> str \| None:
	"""
	Extracts the YouTube video ID from a URL.
	Handles standard, shortened, embed URLs, and direct ID.
	"""
	# Standard URL: https://www.youtube.com/watch?v=VIDEO_ID
	match = re.search(r"watch\?v=([^&]+)", youtube_url)
	if match:
	return match.group(1)

	# Shortened URL: https://youtu.be/VIDEO_ID
	match = re.search(r"youtu\.be/([^?&]+)", youtube_url)
	if match:
	return match.group(1)

	# Embed URL: https://www.youtube.com/embed/VIDEO_ID
	match = re.search(r"youtube\.com/embed/([^?&]+)", youtube_url)
	if match:
	return match.group(1)

	# Video ID directly passed
	if re.fullmatch(r"^[a-zA-Z0-9_-]{11}$", youtube_url):
	return youtube_url
	return None


	def _parse_srt_to_text(srt_content: str) -> str:
	"""
	Parses SRT formatted string to extract plain text.
	Removes timestamps, sequence numbers, and basic HTML formatting.
	"""
	text_lines = []
	lines = srt_content.splitlines()
	for line in lines:
	if not line.strip() or line.strip().isdigit() or '-->' in line:
	continue
	line_text = re.sub(r'<[^>]+>', '', line)
	text_lines.append(line_text.strip())
	return " ".join(text_lines)


	def get_youtube_transcript_official_api(video_url_or_id: str, api_key: str, target_language: str = 'en') -> str:
	"""
	Retrieves the transcript for a YouTube video using the official YouTube Data API v3.
	This function is intended to be exposed as an MCP tool.

	Args:
	video_url_or_id (str): YouTube video URL or 11-character video ID.
	api_key (str): Your YouTube Data API v3 key.
	target_language (str): Preferred language code for the transcript (e.g., 'en', 'es'). Defaults to 'en'.

	Returns:
	str: The concatenated transcript text or an error message.
	"""
	video_id = _extract_video_id(video_url_or_id)
	if not video_id:
	return f"Error: Invalid YouTube video URL or ID: '{video_url_or_id}'. Could not extract a valid video ID."

	if not api_key or not api_key.strip():
	return "Error: YouTube Data API Key is missing. Please provide a valid API key for the 'api_key' argument."

	try:
	youtube = build('youtube', 'v3', developerKey=api_key)
	except Exception as e:
	return f"Error: Could not build YouTube API client. Check library installation. Details: {str(e)}"

	try:
	caption_request = youtube.captions().list(
	part="snippet",
	videoId=video_id
	)
	caption_response = caption_request.execute()

	caption_id_to_download = None
	found_lang_for_download = None
	available_langs_details = []

	for item in caption_response.get('items', []):
	lang_code = item['snippet']['language']
	lang_name = item['snippet'].get('name', 'N/A')
	track_kind = item['snippet'].get('trackKind', 'N/A')
	available_langs_details.append(
	f"{lang_code} (Name: '{lang_name}', Type: {track_kind})")

	if lang_code.lower() == target_language.lower():
	caption_id_to_download = item['id']
	found_lang_for_download = lang_code
	break

	if not caption_id_to_download and target_language.lower() != 'en':
	for item in caption_response.get('items', []):
	lang_code = item['snippet']['language']
	if lang_code.lower() == 'en':
	caption_id_to_download = item['id']
	found_lang_for_download = lang_code
	break

	if not caption_id_to_download:
	available_langs_str = "\n - ".join(
	available_langs_details) if available_langs_details else "None listed (captions might be disabled, non-existent, or API access restricted)"
	return (f"Error: No suitable caption track found for language '{target_language}' "
	f"(or 'en' fallback) for video ID '{video_id}'.\n"
	f"Available caption tracks:\n - {available_langs_str}")

	download_request = youtube.captions().download(
	id=caption_id_to_download,
	tfmt='srt'
	)
	srt_transcript = download_request.execute()

	plain_text_transcript = _parse_srt_to_text(srt_transcript)

	if not plain_text_transcript.strip():
	return (f"Notice: Transcript for video ID '{video_id}' (Language: {found_lang_for_download}) "
	"was downloaded but appears empty after parsing. The SRT file might be malformed or contain no text.")

	return plain_text_transcript

	except HttpError as e:
	error_content_bytes = e.content
	error_details = "No additional details in error content."
	if error_content_bytes:
	try:
	error_details = error_content_bytes.decode('utf-8')
	except UnicodeDecodeError:
	error_details = "Error content could not be decoded (non-UTF-8)."

	status_code = e.resp.status

	if status_code == 403:
	if "quotaExceeded" in error_details.lower() or "daily limit exceeded" in error_details.lower():
	return f"API Error (403): YouTube API quota exceeded. Details: {error_details}"
	return (f"API Error (403): Forbidden. Check API Key ('api_key'), YouTube Data API v3 enablement, or video owner restrictions for video_id='{video_id}'. Details: {error_details}")
	elif status_code == 404:
	return (f"API Error (404): Not Found. Video ID '{video_id}' ('video_url_or_id') might be incorrect, private/deleted, or caption track missing. Details: {error_details}")
	else:
	return f"API Error ({status_code}): An API error occurred while processing video_id='{video_id}'. Details: {error_details}"

	except Exception as e:
	return f"Unexpected Error processing video_id='{video_id}': {type(e).__name__} - {str(e)}"


	def gradio_interface_handler(video_url_or_id: str, api_key: str, language: str):
	"""
	Handler function for the Gradio interface that wraps the main transcript retrieval logic.
	Type hints and this docstring help Gradio generate the MCP tool schema.

	Args:
	video_url_or_id (str): The YouTube video URL or its 11-character ID. This description will appear in the MCP tool schema for this argument.
	api_key (str): The YouTube Data API v3 key. This description will appear in the MCP tool schema for this argument.
	language (str): The preferred ISO 639-1 language code for the transcript (e.g., 'en', 'es'). Defaults to 'en'. This description will appear in the MCP tool schema for this argument.

	Returns:
	str: The fetched transcript or an error message. This defines the tool's output.
	"""
	if not video_url_or_id.strip():
	return "Error: YouTube Video URL or ID ('video_url_or_id') input is empty. Please provide a valid URL or ID."
	if not api_key.strip():
	return "Error: YouTube API Key ('api_key') input is empty. Please provide your API key."

	language_to_use = language.strip().lower(
	) if language and language.strip() else 'en'

	return get_youtube_transcript_official_api(video_url_or_id, api_key, language_to_use)


	# Define Gradio input components
	# The 'label' is for the UI, and 'placeholder' provides a hint.
	# The descriptions for the MCP tool arguments are derived from the docstring of 'gradio_interface_handler'.
	inputs = [
	gr.Textbox(
	label="YouTube Video URL or ID",
	placeholder="e.g., https://www.youtube.com/watch?v=dQw4w9WgXcQ or dQw4w9WgXcQ"
	),
	gr.Textbox(
	label="YouTube Data API Key",
	type="password",
	placeholder="Enter your API key (e.g., AIzaSy...)"
	),
	gr.Textbox(
	label="Preferred Language Code",
	value="en", # Default language
	placeholder="e.g., en, es, fr, de"
	)
	]

	# Define Gradio output component
	# The 'label' is for the UI. The description for the MCP tool output is derived from the return type hint and docstring of 'gradio_interface_handler'.
	outputs = gr.Textbox(
	label="Transcript Output",
	lines=15,
	show_copy_button=True
	)

	# Create and launch the Gradio interface
	demo = gr.Interface(
	fn=gradio_interface_handler, # The function to wrap, with type hints and docstrings
	inputs=inputs,
	outputs=outputs,
	title="YouTube Video Transcript Retriever (MCP Enabled)",
	description=( # This is the main description for the Gradio UI and can also provide context for the tool.
	"Enter a YouTube video URL/ID, your YouTube Data API Key, and a preferred language code "
	"to fetch the video transcript. This interface also exposes an MCP tool for programmatic access. "
	"The MCP tool's argument descriptions are generated from the function's docstring."
	),
	allow_flagging='never',
	examples=[
	["https://www.youtube.com/watch?v=dQw4w9WgXcQ", "YOUR_API_KEY_HERE", "en"],
	["Mdcw3_s2T_s", "YOUR_API_KEY_HERE", "en"],
	["https://www.youtube.com/watch?v=rokGy0huYEA", "YOUR_API_KEY_HERE", "ja"]
	],
	article=(
	"Using the Web Interface:\n"
	"1. Obtain a [YouTube Data API v3 key](https://developers.google.com/youtube/v3/getting-started).\n"
	"2. Ensure the YouTube Data API v3 is enabled for your project in Google Cloud Console.\n"
	"3. Paste the video URL/ID, your API key, and desired language code into the respective fields.\n"
	"4. Click 'Submit' to retrieve the transcript.\n\n"
	"MCP Server Information:\n"
	"When launched with `mcp_server=True`, Gradio also starts an MCP server.\n"
	"- The tool schema (including argument descriptions from the function's docstring) can typically be found at `/gradio_api/mcp/schema`.\n"
	"- The MCP server endpoint is usually at `/gradio_api/mcp/sse`.\n"
	"This allows AI models and other MCP clients to use the transcript retrieval functionality programmatically."
	)
	)

	if __name__ == '__main__':
	print("Gradio app starting...")
	print("MCP Server integration is enabled via mcp_server=True.")
	print(
	"Ensure 'gradio[mcp]' is installed if you encounter issues related to MCP.")
	demo.launch(mcp_server=True)