File size: 10,545 Bytes
4955f2d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
import re
import gradio as gr
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError

# Re-used function to extract video ID


def _extract_video_id(youtube_url: str) -> str | None:
    """
    Extracts the YouTube video ID from a URL.
    Handles standard, shortened, embed URLs, and direct ID.
    """
    # Standard URL: https://www.youtube.com/watch?v=VIDEO_ID
    match = re.search(r"watch\?v=([^&]+)", youtube_url)
    if match:
        return match.group(1)

    # Shortened URL: https://youtu.be/VIDEO_ID
    match = re.search(r"youtu\.be/([^?&]+)", youtube_url)
    if match:
        return match.group(1)

    # Embed URL: https://www.youtube.com/embed/VIDEO_ID
    match = re.search(r"youtube\.com/embed/([^?&]+)", youtube_url)
    if match:
        return match.group(1)

    # Video ID directly passed
    if re.fullmatch(r"^[a-zA-Z0-9_-]{11}$", youtube_url):
        return youtube_url
    return None


def _parse_srt_to_text(srt_content: str) -> str:
    """
    Parses SRT formatted string to extract plain text.
    Removes timestamps, sequence numbers, and basic HTML formatting.
    """
    text_lines = []
    lines = srt_content.splitlines()
    for line in lines:
        if not line.strip() or line.strip().isdigit() or '-->' in line:
            continue
        line_text = re.sub(r'<[^>]+>', '', line)
        text_lines.append(line_text.strip())
    return " ".join(text_lines)


def get_youtube_transcript_official_api(video_url_or_id: str, api_key: str, target_language: str = 'en') -> str:
    """
    Retrieves the transcript for a YouTube video using the official YouTube Data API v3.
    This function is intended to be exposed as an MCP tool.

    Args:
        video_url_or_id (str): YouTube video URL or 11-character video ID.
        api_key (str): Your YouTube Data API v3 key.
        target_language (str): Preferred language code for the transcript (e.g., 'en', 'es'). Defaults to 'en'.

    Returns:
        str: The concatenated transcript text or an error message.
    """
    video_id = _extract_video_id(video_url_or_id)
    if not video_id:
        return f"Error: Invalid YouTube video URL or ID: '{video_url_or_id}'. Could not extract a valid video ID."

    if not api_key or not api_key.strip():
        return "Error: YouTube Data API Key is missing. Please provide a valid API key for the 'api_key' argument."

    try:
        youtube = build('youtube', 'v3', developerKey=api_key)
    except Exception as e:
        return f"Error: Could not build YouTube API client. Check library installation. Details: {str(e)}"

    try:
        caption_request = youtube.captions().list(
            part="snippet",
            videoId=video_id
        )
        caption_response = caption_request.execute()

        caption_id_to_download = None
        found_lang_for_download = None
        available_langs_details = []

        for item in caption_response.get('items', []):
            lang_code = item['snippet']['language']
            lang_name = item['snippet'].get('name', 'N/A')
            track_kind = item['snippet'].get('trackKind', 'N/A')
            available_langs_details.append(
                f"{lang_code} (Name: '{lang_name}', Type: {track_kind})")

            if lang_code.lower() == target_language.lower():
                caption_id_to_download = item['id']
                found_lang_for_download = lang_code
                break

        if not caption_id_to_download and target_language.lower() != 'en':
            for item in caption_response.get('items', []):
                lang_code = item['snippet']['language']
                if lang_code.lower() == 'en':
                    caption_id_to_download = item['id']
                    found_lang_for_download = lang_code
                    break

        if not caption_id_to_download:
            available_langs_str = "\n - ".join(
                available_langs_details) if available_langs_details else "None listed (captions might be disabled, non-existent, or API access restricted)"
            return (f"Error: No suitable caption track found for language '{target_language}' "
                    f"(or 'en' fallback) for video ID '{video_id}'.\n"
                    f"Available caption tracks:\n - {available_langs_str}")

        download_request = youtube.captions().download(
            id=caption_id_to_download,
            tfmt='srt'
        )
        srt_transcript = download_request.execute()

        plain_text_transcript = _parse_srt_to_text(srt_transcript)

        if not plain_text_transcript.strip():
            return (f"Notice: Transcript for video ID '{video_id}' (Language: {found_lang_for_download}) "
                    "was downloaded but appears empty after parsing. The SRT file might be malformed or contain no text.")

        return plain_text_transcript

    except HttpError as e:
        error_content_bytes = e.content
        error_details = "No additional details in error content."
        if error_content_bytes:
            try:
                error_details = error_content_bytes.decode('utf-8')
            except UnicodeDecodeError:
                error_details = "Error content could not be decoded (non-UTF-8)."

        status_code = e.resp.status

        if status_code == 403:
            if "quotaExceeded" in error_details.lower() or "daily limit exceeded" in error_details.lower():
                return f"API Error (403): YouTube API quota exceeded. Details: {error_details}"
            return (f"API Error (403): Forbidden. Check API Key ('api_key'), YouTube Data API v3 enablement, or video owner restrictions for video_id='{video_id}'. Details: {error_details}")
        elif status_code == 404:
            return (f"API Error (404): Not Found. Video ID '{video_id}' ('video_url_or_id') might be incorrect, private/deleted, or caption track missing. Details: {error_details}")
        else:
            return f"API Error ({status_code}): An API error occurred while processing video_id='{video_id}'. Details: {error_details}"

    except Exception as e:
        return f"Unexpected Error processing video_id='{video_id}': {type(e).__name__} - {str(e)}"


def gradio_interface_handler(video_url_or_id: str, api_key: str, language: str):
    """
    Handler function for the Gradio interface that wraps the main transcript retrieval logic.
    Type hints and this docstring help Gradio generate the MCP tool schema.

    Args:
        video_url_or_id (str): The YouTube video URL or its 11-character ID. This description will appear in the MCP tool schema for this argument.
        api_key (str): The YouTube Data API v3 key. This description will appear in the MCP tool schema for this argument.
        language (str): The preferred ISO 639-1 language code for the transcript (e.g., 'en', 'es'). Defaults to 'en'. This description will appear in the MCP tool schema for this argument.

    Returns:
        str: The fetched transcript or an error message. This defines the tool's output.
    """
    if not video_url_or_id.strip():
        return "Error: YouTube Video URL or ID ('video_url_or_id') input is empty. Please provide a valid URL or ID."
    if not api_key.strip():
        return "Error: YouTube API Key ('api_key') input is empty. Please provide your API key."

    language_to_use = language.strip().lower(
    ) if language and language.strip() else 'en'

    return get_youtube_transcript_official_api(video_url_or_id, api_key, language_to_use)


# Define Gradio input components
# The 'label' is for the UI, and 'placeholder' provides a hint.
# The descriptions for the MCP tool arguments are derived from the docstring of 'gradio_interface_handler'.
inputs = [
    gr.Textbox(
        label="YouTube Video URL or ID",
        placeholder="e.g., https://www.youtube.com/watch?v=dQw4w9WgXcQ or dQw4w9WgXcQ"
    ),
    gr.Textbox(
        label="YouTube Data API Key",
        type="password",
        placeholder="Enter your API key (e.g., AIzaSy...)"
    ),
    gr.Textbox(
        label="Preferred Language Code",
        value="en",  # Default language
        placeholder="e.g., en, es, fr, de"
    )
]

# Define Gradio output component
# The 'label' is for the UI. The description for the MCP tool output is derived from the return type hint and docstring of 'gradio_interface_handler'.
outputs = gr.Textbox(
    label="Transcript Output",
    lines=15,
    show_copy_button=True
)

# Create and launch the Gradio interface
demo = gr.Interface(
    fn=gradio_interface_handler,  # The function to wrap, with type hints and docstrings
    inputs=inputs,
    outputs=outputs,
    title="YouTube Video Transcript Retriever (MCP Enabled)",
    description=(  # This is the main description for the Gradio UI and can also provide context for the tool.
        "Enter a YouTube video URL/ID, your YouTube Data API Key, and a preferred language code "
        "to fetch the video transcript. This interface also exposes an MCP tool for programmatic access. "
        "The MCP tool's argument descriptions are generated from the function's docstring."
    ),
    allow_flagging='never',
    examples=[
        ["https://www.youtube.com/watch?v=dQw4w9WgXcQ", "YOUR_API_KEY_HERE", "en"],
        ["Mdcw3_s2T_s", "YOUR_API_KEY_HERE", "en"],
        ["https://www.youtube.com/watch?v=rokGy0huYEA", "YOUR_API_KEY_HERE", "ja"]
    ],
    article=(
        "**Using the Web Interface:**\n"
        "1. Obtain a [YouTube Data API v3 key](https://developers.google.com/youtube/v3/getting-started).\n"
        "2. Ensure the YouTube Data API v3 is enabled for your project in Google Cloud Console.\n"
        "3. Paste the video URL/ID, your API key, and desired language code into the respective fields.\n"
        "4. Click 'Submit' to retrieve the transcript.\n\n"
        "**MCP Server Information:**\n"
        "When launched with `mcp_server=True`, Gradio also starts an MCP server.\n"
        "- The tool schema (including argument descriptions from the function's docstring) can typically be found at `/gradio_api/mcp/schema`.\n"
        "- The MCP server endpoint is usually at `/gradio_api/mcp/sse`.\n"
        "This allows AI models and other MCP clients to use the transcript retrieval functionality programmatically."
    )
)

if __name__ == '__main__':
    print("Gradio app starting...")
    print("MCP Server integration is enabled via mcp_server=True.")
    print(
        "Ensure 'gradio[mcp]' is installed if you encounter issues related to MCP.")
    demo.launch(mcp_server=True)