Spaces:

kirbah
/

mcp-youtube-transcript

Running

File size: 10,545 Bytes

4955f2d

import re
import gradio as gr
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError

# Re-used function to extract video ID


def _extract_video_id(youtube_url: str) -> str | None:
    """
    Extracts the YouTube video ID from a URL.
    Handles standard, shortened, embed URLs, and direct ID.
    """
    # Standard URL: https://www.youtube.com/watch?v=VIDEO_ID
    match = re.search(r"watch\?v=([^&]+)", youtube_url)
    if match:
        return match.group(1)

    # Shortened URL: https://youtu.be/VIDEO_ID
    match = re.search(r"youtu\.be/([^?&]+)", youtube_url)
    if match:
        return match.group(1)

    # Embed URL: https://www.youtube.com/embed/VIDEO_ID
    match = re.search(r"youtube\.com/embed/([^?&]+)", youtube_url)
    if match:
        return match.group(1)

    # Video ID directly passed
    if re.fullmatch(r"^[a-zA-Z0-9_-]{11}$", youtube_url):
        return youtube_url
    return None


def _parse_srt_to_text(srt_content: str) -> str:
    """
    Parses SRT formatted string to extract plain text.
    Removes timestamps, sequence numbers, and basic HTML formatting.
    """
    text_lines = []
    lines = srt_content.splitlines()
    for line in lines:
        if not line.strip() or line.strip().isdigit() or '-->' in line:
            continue
        line_text = re.sub(r'<[^>]+>', '', line)
        text_lines.append(line_text.strip())
    return " ".join(text_lines)


def get_youtube_transcript_official_api(video_url_or_id: str, api_key: str, target_language: str = 'en') -> str:
    """
    Retrieves the transcript for a YouTube video using the official YouTube Data API v3.
    This function is intended to be exposed as an MCP tool.

    Args:
        video_url_or_id (str): YouTube video URL or 11-character video ID.
        api_key (str): Your YouTube Data API v3 key.
        target_language (str): Preferred language code for the transcript (e.g., 'en', 'es'). Defaults to 'en'.

    Returns:
        str: The concatenated transcript text or an error message.
    """
    video_id = _extract_video_id(video_url_or_id)
    if not video_id:
        return f"Error: Invalid YouTube video URL or ID: '{video_url_or_id}'. Could not extract a valid video ID."

    if not api_key or not api_key.strip():
        return "Error: YouTube Data API Key is missing. Please provide a valid API key for the 'api_key' argument."

    try:
        youtube = build('youtube', 'v3', developerKey=api_key)
    except Exception as e:
        return f"Error: Could not build YouTube API client. Check library installation. Details: {str(e)}"

    try:
        caption_request = youtube.captions().list(
            part="snippet",
            videoId=video_id
        )
        caption_response = caption_request.execute()

        caption_id_to_download = None
        found_lang_for_download = None
        available_langs_details = []

        for item in caption_response.get('items', []):
            lang_code = item['snippet']['language']
            lang_name = item['snippet'].get('name', 'N/A')
            track_kind = item['snippet'].get('trackKind', 'N/A')
            available_langs_details.append(
                f"{lang_code} (Name: '{lang_name}', Type: {track_kind})")

            if lang_code.lower() == target_language.lower():
                caption_id_to_download = item['id']
                found_lang_for_download = lang_code
                break

        if not caption_id_to_download and target_language.lower() != 'en':
            for item in caption_response.get('items', []):
                lang_code = item['snippet']['language']
                if lang_code.lower() == 'en':
                    caption_id_to_download = item['id']
                    found_lang_for_download = lang_code
                    break

        if not caption_id_to_download:
            available_langs_str = "\n - ".join(
                available_langs_details) if available_langs_details else "None listed (captions might be disabled, non-existent, or API access restricted)"
            return (f"Error: No suitable caption track found for language '{target_language}' "
                    f"(or 'en' fallback) for video ID '{video_id}'.\n"
                    f"Available caption tracks:\n - {available_langs_str}")

        download_request = youtube.captions().download(
            id=caption_id_to_download,
            tfmt='srt'
        )
        srt_transcript = download_request.execute()

        plain_text_transcript = _parse_srt_to_text(srt_transcript)

        if not plain_text_transcript.strip():
            return (f"Notice: Transcript for video ID '{video_id}' (Language: {found_lang_for_download}) "
                    "was downloaded but appears empty after parsing. The SRT file might be malformed or contain no text.")

        return plain_text_transcript

    except HttpError as e:
        error_content_bytes = e.content
        error_details = "No additional details in error content."
        if error_content_bytes:
            try:
                error_details = error_content_bytes.decode('utf-8')
            except UnicodeDecodeError:
                error_details = "Error content could not be decoded (non-UTF-8)."

        status_code = e.resp.status

        if status_code == 403:
            if "quotaExceeded" in error_details.lower() or "daily limit exceeded" in error_details.lower():
                return f"API Error (403): YouTube API quota exceeded. Details: {error_details}"
            return (f"API Error (403): Forbidden. Check API Key ('api_key'), YouTube Data API v3 enablement, or video owner restrictions for video_id='{video_id}'. Details: {error_details}")
        elif status_code == 404:
            return (f"API Error (404): Not Found. Video ID '{video_id}' ('video_url_or_id') might be incorrect, private/deleted, or caption track missing. Details: {error_details}")
        else:
            return f"API Error ({status_code}): An API error occurred while processing video_id='{video_id}'. Details: {error_details}"

    except Exception as e:
        return f"Unexpected Error processing video_id='{video_id}': {type(e).__name__} - {str(e)}"


def gradio_interface_handler(video_url_or_id: str, api_key: str, language: str):
    """
    Handler function for the Gradio interface that wraps the main transcript retrieval logic.
    Type hints and this docstring help Gradio generate the MCP tool schema.

    Args:
        video_url_or_id (str): The YouTube video URL or its 11-character ID. This description will appear in the MCP tool schema for this argument.
        api_key (str): The YouTube Data API v3 key. This description will appear in the MCP tool schema for this argument.
        language (str): The preferred ISO 639-1 language code for the transcript (e.g., 'en', 'es'). Defaults to 'en'. This description will appear in the MCP tool schema for this argument.

    Returns:
        str: The fetched transcript or an error message. This defines the tool's output.
    """
    if not video_url_or_id.strip():
        return "Error: YouTube Video URL or ID ('video_url_or_id') input is empty. Please provide a valid URL or ID."
    if not api_key.strip():
        return "Error: YouTube API Key ('api_key') input is empty. Please provide your API key."

    language_to_use = language.strip().lower(
    ) if language and language.strip() else 'en'

    return get_youtube_transcript_official_api(video_url_or_id, api_key, language_to_use)


# Define Gradio input components
# The 'label' is for the UI, and 'placeholder' provides a hint.
# The descriptions for the MCP tool arguments are derived from the docstring of 'gradio_interface_handler'.
inputs = [
    gr.Textbox(
        label="YouTube Video URL or ID",
        placeholder="e.g., https://www.youtube.com/watch?v=dQw4w9WgXcQ or dQw4w9WgXcQ"
    ),
    gr.Textbox(
        label="YouTube Data API Key",
        type="password",
        placeholder="Enter your API key (e.g., AIzaSy...)"
    ),
    gr.Textbox(
        label="Preferred Language Code",
        value="en",  # Default language
        placeholder="e.g., en, es, fr, de"
    )
]

# Define Gradio output component
# The 'label' is for the UI. The description for the MCP tool output is derived from the return type hint and docstring of 'gradio_interface_handler'.
outputs = gr.Textbox(
    label="Transcript Output",
    lines=15,
    show_copy_button=True
)

# Create and launch the Gradio interface
demo = gr.Interface(
    fn=gradio_interface_handler,  # The function to wrap, with type hints and docstrings
    inputs=inputs,
    outputs=outputs,
    title="YouTube Video Transcript Retriever (MCP Enabled)",
    description=(  # This is the main description for the Gradio UI and can also provide context for the tool.
        "Enter a YouTube video URL/ID, your YouTube Data API Key, and a preferred language code "
        "to fetch the video transcript. This interface also exposes an MCP tool for programmatic access. "
        "The MCP tool's argument descriptions are generated from the function's docstring."
    ),
    allow_flagging='never',
    examples=[
        ["https://www.youtube.com/watch?v=dQw4w9WgXcQ", "YOUR_API_KEY_HERE", "en"],
        ["Mdcw3_s2T_s", "YOUR_API_KEY_HERE", "en"],
        ["https://www.youtube.com/watch?v=rokGy0huYEA", "YOUR_API_KEY_HERE", "ja"]
    ],
    article=(
        "**Using the Web Interface:**\n"
        "1. Obtain a [YouTube Data API v3 key](https://developers.google.com/youtube/v3/getting-started).\n"
        "2. Ensure the YouTube Data API v3 is enabled for your project in Google Cloud Console.\n"
        "3. Paste the video URL/ID, your API key, and desired language code into the respective fields.\n"
        "4. Click 'Submit' to retrieve the transcript.\n\n"
        "**MCP Server Information:**\n"
        "When launched with `mcp_server=True`, Gradio also starts an MCP server.\n"
        "- The tool schema (including argument descriptions from the function's docstring) can typically be found at `/gradio_api/mcp/schema`.\n"
        "- The MCP server endpoint is usually at `/gradio_api/mcp/sse`.\n"
        "This allows AI models and other MCP clients to use the transcript retrieval functionality programmatically."
    )
)

if __name__ == '__main__':
    print("Gradio app starting...")
    print("MCP Server integration is enabled via mcp_server=True.")
    print(
        "Ensure 'gradio[mcp]' is installed if you encounter issues related to MCP.")
    demo.launch(mcp_server=True)