Spaces:

kirbah
/

mcp-youtube-transcript

Running

App Files Files Community

kirbah commited on 5 days ago

Commit

4955f2d

1 Parent(s): 666750d

Better descriptions

Browse files

Files changed (3) hide show

app via API.py +239 -0
app.py +154 -57
requirements.txt +2 -1

app via API.py ADDED Viewed

	@@ -0,0 +1,239 @@

+import re
+import gradio as gr
+from googleapiclient.discovery import build
+from googleapiclient.errors import HttpError
+# Re-used function to extract video ID
+def _extract_video_id(youtube_url: str) -> str | None:
+    """
+    Extracts the YouTube video ID from a URL.
+    Handles standard, shortened, embed URLs, and direct ID.
+    """
+    # Standard URL: https://www.youtube.com/watch?v=VIDEO_ID
+    match = re.search(r"watch\?v=([^&]+)", youtube_url)
+    if match:
+        return match.group(1)
+    # Shortened URL: https://youtu.be/VIDEO_ID
+    match = re.search(r"youtu\.be/([^?&]+)", youtube_url)
+    if match:
+        return match.group(1)
+    # Embed URL: https://www.youtube.com/embed/VIDEO_ID
+    match = re.search(r"youtube\.com/embed/([^?&]+)", youtube_url)
+    if match:
+        return match.group(1)
+    # Video ID directly passed
+    if re.fullmatch(r"^[a-zA-Z0-9_-]{11}$", youtube_url):
+        return youtube_url
+    return None
+def _parse_srt_to_text(srt_content: str) -> str:
+    """
+    Parses SRT formatted string to extract plain text.
+    Removes timestamps, sequence numbers, and basic HTML formatting.
+    """
+    text_lines = []
+    lines = srt_content.splitlines()
+    for line in lines:
+        if not line.strip() or line.strip().isdigit() or '-->' in line:
+            continue
+        line_text = re.sub(r'<[^>]+>', '', line)
+        text_lines.append(line_text.strip())
+    return " ".join(text_lines)
+def get_youtube_transcript_official_api(video_url_or_id: str, api_key: str, target_language: str = 'en') -> str:
+    """
+    Retrieves the transcript for a YouTube video using the official YouTube Data API v3.
+    This function is intended to be exposed as an MCP tool.
+    Args:
+        video_url_or_id (str): YouTube video URL or 11-character video ID.
+        api_key (str): Your YouTube Data API v3 key.
+        target_language (str): Preferred language code for the transcript (e.g., 'en', 'es'). Defaults to 'en'.
+    Returns:
+        str: The concatenated transcript text or an error message.
+    """
+    video_id = _extract_video_id(video_url_or_id)
+    if not video_id:
+        return f"Error: Invalid YouTube video URL or ID: '{video_url_or_id}'. Could not extract a valid video ID."
+    if not api_key or not api_key.strip():
+        return "Error: YouTube Data API Key is missing. Please provide a valid API key for the 'api_key' argument."
+    try:
+        youtube = build('youtube', 'v3', developerKey=api_key)
+    except Exception as e:
+        return f"Error: Could not build YouTube API client. Check library installation. Details: {str(e)}"
+    try:
+        caption_request = youtube.captions().list(
+            part="snippet",
+            videoId=video_id
+        )
+        caption_response = caption_request.execute()
+        caption_id_to_download = None
+        found_lang_for_download = None
+        available_langs_details = []
+        for item in caption_response.get('items', []):
+            lang_code = item['snippet']['language']
+            lang_name = item['snippet'].get('name', 'N/A')
+            track_kind = item['snippet'].get('trackKind', 'N/A')
+            available_langs_details.append(
+                f"{lang_code} (Name: '{lang_name}', Type: {track_kind})")
+            if lang_code.lower() == target_language.lower():
+                caption_id_to_download = item['id']
+                found_lang_for_download = lang_code
+                break
+        if not caption_id_to_download and target_language.lower() != 'en':
+            for item in caption_response.get('items', []):
+                lang_code = item['snippet']['language']
+                if lang_code.lower() == 'en':
+                    caption_id_to_download = item['id']
+                    found_lang_for_download = lang_code
+                    break
+        if not caption_id_to_download:
+            available_langs_str = "\n - ".join(
+                available_langs_details) if available_langs_details else "None listed (captions might be disabled, non-existent, or API access restricted)"
+            return (f"Error: No suitable caption track found for language '{target_language}' "
+                    f"(or 'en' fallback) for video ID '{video_id}'.\n"
+                    f"Available caption tracks:\n - {available_langs_str}")
+        download_request = youtube.captions().download(
+            id=caption_id_to_download,
+            tfmt='srt'
+        )
+        srt_transcript = download_request.execute()
+        plain_text_transcript = _parse_srt_to_text(srt_transcript)
+        if not plain_text_transcript.strip():
+            return (f"Notice: Transcript for video ID '{video_id}' (Language: {found_lang_for_download}) "
+                    "was downloaded but appears empty after parsing. The SRT file might be malformed or contain no text.")
+        return plain_text_transcript
+    except HttpError as e:
+        error_content_bytes = e.content
+        error_details = "No additional details in error content."
+        if error_content_bytes:
+            try:
+                error_details = error_content_bytes.decode('utf-8')
+            except UnicodeDecodeError:
+                error_details = "Error content could not be decoded (non-UTF-8)."
+        status_code = e.resp.status
+        if status_code == 403:
+            if "quotaExceeded" in error_details.lower() or "daily limit exceeded" in error_details.lower():
+                return f"API Error (403): YouTube API quota exceeded. Details: {error_details}"
+            return (f"API Error (403): Forbidden. Check API Key ('api_key'), YouTube Data API v3 enablement, or video owner restrictions for video_id='{video_id}'. Details: {error_details}")
+        elif status_code == 404:
+            return (f"API Error (404): Not Found. Video ID '{video_id}' ('video_url_or_id') might be incorrect, private/deleted, or caption track missing. Details: {error_details}")
+        else:
+            return f"API Error ({status_code}): An API error occurred while processing video_id='{video_id}'. Details: {error_details}"
+    except Exception as e:
+        return f"Unexpected Error processing video_id='{video_id}': {type(e).__name__} - {str(e)}"
+def gradio_interface_handler(video_url_or_id: str, api_key: str, language: str):
+    """
+    Handler function for the Gradio interface that wraps the main transcript retrieval logic.
+    Type hints and this docstring help Gradio generate the MCP tool schema.
+    Args:
+        video_url_or_id (str): The YouTube video URL or its 11-character ID. This description will appear in the MCP tool schema for this argument.
+        api_key (str): The YouTube Data API v3 key. This description will appear in the MCP tool schema for this argument.
+        language (str): The preferred ISO 639-1 language code for the transcript (e.g., 'en', 'es'). Defaults to 'en'. This description will appear in the MCP tool schema for this argument.
+    Returns:
+        str: The fetched transcript or an error message. This defines the tool's output.
+    """
+    if not video_url_or_id.strip():
+        return "Error: YouTube Video URL or ID ('video_url_or_id') input is empty. Please provide a valid URL or ID."
+    if not api_key.strip():
+        return "Error: YouTube API Key ('api_key') input is empty. Please provide your API key."
+    language_to_use = language.strip().lower(
+    ) if language and language.strip() else 'en'
+    return get_youtube_transcript_official_api(video_url_or_id, api_key, language_to_use)
+# Define Gradio input components
+# The 'label' is for the UI, and 'placeholder' provides a hint.
+# The descriptions for the MCP tool arguments are derived from the docstring of 'gradio_interface_handler'.
+inputs = [
+    gr.Textbox(
+        label="YouTube Video URL or ID",
+        placeholder="e.g., https://www.youtube.com/watch?v=dQw4w9WgXcQ or dQw4w9WgXcQ"
+    ),
+    gr.Textbox(
+        label="YouTube Data API Key",
+        type="password",
+        placeholder="Enter your API key (e.g., AIzaSy...)"
+    ),
+    gr.Textbox(
+        label="Preferred Language Code",
+        value="en",  # Default language
+        placeholder="e.g., en, es, fr, de"
+    )
+]
+# Define Gradio output component
+# The 'label' is for the UI. The description for the MCP tool output is derived from the return type hint and docstring of 'gradio_interface_handler'.
+outputs = gr.Textbox(
+    label="Transcript Output",
+    lines=15,
+    show_copy_button=True
+)
+# Create and launch the Gradio interface
+demo = gr.Interface(
+    fn=gradio_interface_handler,  # The function to wrap, with type hints and docstrings
+    inputs=inputs,
+    outputs=outputs,
+    title="YouTube Video Transcript Retriever (MCP Enabled)",
+    description=(  # This is the main description for the Gradio UI and can also provide context for the tool.
+        "Enter a YouTube video URL/ID, your YouTube Data API Key, and a preferred language code "
+        "to fetch the video transcript. This interface also exposes an MCP tool for programmatic access. "
+        "The MCP tool's argument descriptions are generated from the function's docstring."
+    ),
+    allow_flagging='never',
+    examples=[
+        ["https://www.youtube.com/watch?v=dQw4w9WgXcQ", "YOUR_API_KEY_HERE", "en"],
+        ["Mdcw3_s2T_s", "YOUR_API_KEY_HERE", "en"],
+        ["https://www.youtube.com/watch?v=rokGy0huYEA", "YOUR_API_KEY_HERE", "ja"]
+    ],
+    article=(
+        "**Using the Web Interface:**\n"
+        "1. Obtain a [YouTube Data API v3 key](https://developers.google.com/youtube/v3/getting-started).\n"
+        "2. Ensure the YouTube Data API v3 is enabled for your project in Google Cloud Console.\n"
+        "3. Paste the video URL/ID, your API key, and desired language code into the respective fields.\n"
+        "4. Click 'Submit' to retrieve the transcript.\n\n"
+        "**MCP Server Information:**\n"
+        "When launched with `mcp_server=True`, Gradio also starts an MCP server.\n"
+        "- The tool schema (including argument descriptions from the function's docstring) can typically be found at `/gradio_api/mcp/schema`.\n"
+        "- The MCP server endpoint is usually at `/gradio_api/mcp/sse`.\n"
+        "This allows AI models and other MCP clients to use the transcript retrieval functionality programmatically."
+    )
+)
+if __name__ == '__main__':
+    print("Gradio app starting...")
+    print("MCP Server integration is enabled via mcp_server=True.")
+    print(
+        "Ensure 'gradio[mcp]' is installed if you encounter issues related to MCP.")
+    demo.launch(mcp_server=True)

app.py CHANGED Viewed

@@ -25,109 +25,206 @@ def _extract_video_id(youtube_url: str) -> str | None:
         return match.group(1)
     # Video ID directly passed
-    # Basic check for a valid video ID format
     if re.fullmatch(r"^[a-zA-Z0-9_-]{11}$", youtube_url):
         return youtube_url
     return None
-def get_youtube_video_transcript(video_url_or_id: str, lang_preference: list[str] = ['en', 'en-US', 'en-GB']) -> str:
     """
-    Retrieves the transcript for a given YouTube video URL or video ID.
-    It tries to fetch the transcript in the preferred languages first (defaulting to English).
     Args:
         video_url_or_id (str): The full YouTube video URL (e.g., "https://www.youtube.com/watch?v=VIDEO_ID")
                                or just the 11-character video ID.
-        lang_preference (list[str]): A list of language codes to try for the transcript, in order of preference.
-                                     Defaults to ['en', 'en-US', 'en-GB'].
     Returns:
         str: The concatenated transcript text if successful.
-             An error message string if the transcript cannot be fetched (e.g., disabled, not found, invalid ID).
     """
     video_id = _extract_video_id(video_url_or_id)
     if not video_id:
-        return f"Error: Invalid YouTube video URL or ID provided: '{video_url_or_id}'. Could not extract a valid video ID."
     try:
-        # Fetch available transcripts
-        transcript_list = YouTubeTranscriptApi().list(video_id)
-        # Try to find transcript in preferred languages
-        transcript = None
-        for lang_code in lang_preference:
             try:
-                transcript = transcript_list.find_transcript([lang_code])
                 break
             except NoTranscriptFound:
                 continue
-        # If not found in preferred, try generated transcript in preferred languages
-        if not transcript:
-            for lang_code in lang_preference:
                 try:
-                    transcript = transcript_list.find_generated_transcript([
-                                                                           lang_code])
                     break
                 except NoTranscriptFound:
                     continue
-        # If still not found, try any available English transcript
-        if not transcript:
-            try:
-                transcript = transcript_list.find_transcript(
-                    ['en', 'en-US', 'en-GB', 'en-AU', 'en-CA', 'en-IN'])
-            except NoTranscriptFound:
-                pass  # Continue to try any generated English transcript
-        if not transcript:
             try:
-                transcript = transcript_list.find_generated_transcript(
-                    ['en', 'en-US', 'en-GB', 'en-AU', 'en-CA', 'en-IN'])
             except NoTranscriptFound:
-                # If no English transcript, grab the first available original language transcript
                 try:
-                    print(
-                        f"YouTubeTool: No English transcript found for {video_id}. Trying first available original language.")
-                    original_lang_transcript = next(
-                        iter(transcript_list))  # get the first one
-                    transcript = original_lang_transcript
-                except StopIteration:  # No transcripts at all
-                    return f"Error: No transcripts at all seem to be available for video ID '{video_id}'."
-                except NoTranscriptFound:  # Should be caught by StopIteration if list is empty
-                    return f"Error: No transcripts found for video ID '{video_id}' after trying preferred and English languages."
-        if transcript:
-            full_transcript_data = transcript.fetch()
-            # Concatenate all text segments into a single string
             transcript_text = " ".join([segment.text
                                        for segment in full_transcript_data])
             return transcript_text
         else:
-            # This case should ideally be covered by the fallbacks above
-            return f"Error: Could not find a suitable transcript for video ID '{video_id}' in languages: {lang_preference} or English."
     except TranscriptsDisabled:
         return f"Error: Transcripts are disabled for video ID '{video_id}'."
-    # This might catch cases where video ID is valid but has zero transcripts at all.
-    except NoTranscriptFound:
-        return f"Error: No transcripts whatsoever could be found for video ID '{video_id}'. The video might not have any captions or transcripts."
     except Exception as e:
-        # Catch any other unexpected errors from the API or video ID issues not caught by regex
         error_type = type(e).__name__
-        # Check for common youtube_transcript_api specific errors not explicitly caught if any
-        # Heuristic for bad ID
         if "video ID" in str(e).lower() or "parameter" in str(e).lower():
-            return f"Error: Could not retrieve transcript for video ID '{video_id}'. It might be an invalid ID or the video is private/deleted. (API Error: {error_type})"
         return f"Error: An unexpected error occurred while fetching transcript for video ID '{video_id}': {error_type} - {str(e)}"
-def greet(name):
-    return get_youtube_video_transcript(name)
-demo = gr.Interface(fn=greet, inputs="text", outputs="text")
-demo.launch(mcp_server=True)

         return match.group(1)
     # Video ID directly passed
     if re.fullmatch(r"^[a-zA-Z0-9_-]{11}$", youtube_url):
         return youtube_url
     return None
+def get_youtube_video_transcript_scraper(video_url_or_id: str, lang_preference_list: list[str]) -> str:
     """
+    Retrieves the transcript for a given YouTube video URL or video ID using your specified youtube_transcript_api methods.
+    It tries to fetch the transcript in the preferred languages first.
     Args:
         video_url_or_id (str): The full YouTube video URL (e.g., "https://www.youtube.com/watch?v=VIDEO_ID")
                                or just the 11-character video ID.
+        lang_preference_list (list[str]): A list of language codes to try for the transcript, in order of preference.
+                                     Example: ['en', 'en-US', 'es'].
     Returns:
         str: The concatenated transcript text if successful.
+             An error message string if the transcript cannot be fetched.
     """
     video_id = _extract_video_id(video_url_or_id)
     if not video_id:
+        return f"Error: Invalid YouTube video URL or ID: '{video_url_or_id}'. Could not extract a valid video ID."
+    if not lang_preference_list:
+        return "Error: Language preference list ('lang_preference_list') cannot be empty."
     try:
+        # Using your specified API instantiation and list method
+        api = YouTubeTranscriptApi()
+        transcript_list_obj = api.list(
+            video_id)  # This is TranscriptList object
+        transcript_found = None
+        # Try to find manually created transcript in preferred languages
+        for lang_code in lang_preference_list:
             try:
+                # Using your specified find_transcript method
+                transcript_found = transcript_list_obj.find_transcript([
+                                                                       lang_code])
                 break
             except NoTranscriptFound:
                 continue
+        # If not found, try generated transcript in preferred languages
+        if not transcript_found:
+            for lang_code in lang_preference_list:
                 try:
+                    # Using your specified find_generated_transcript method
+                    transcript_found = transcript_list_obj.find_generated_transcript([
+                                                                                     lang_code])
                     break
                 except NoTranscriptFound:
                     continue
+        # Fallback logic (similar to your original code's structure)
+        english_fallbacks = ['en', 'en-US', 'en-GB', 'en-AU', 'en-CA', 'en-IN']
+        already_tried_english = any(lang.lower().startswith(
+            'en') for lang in lang_preference_list)
+        if not transcript_found and not already_tried_english:
+            # Try any available English transcript (manual first)
             try:
+                transcript_found = transcript_list_obj.find_transcript(
+                    english_fallbacks)
             except NoTranscriptFound:
+                # Then try generated English
                 try:
+                    transcript_found = transcript_list_obj.find_generated_transcript(
+                        english_fallbacks)
+                except NoTranscriptFound:
+                    pass  # No English transcript found
+        # If still not found, try the first available original language (as per your initial logic)
+        if not transcript_found:
+            try:
+                # This part requires iterating through the TranscriptList object if no specific methods like "get_first" exist.
+                # Your original code used `next(iter(transcript_list_obj))` which implies the object is iterable.
+                # Let's assume the TranscriptList object itself can be iterated or has a way to get its items.
+                # A more direct way, if the object behaves like a list of available transcripts:
+                print(f"Notice: No transcript found in preferred languages or English for video ID '{video_id}'. "
+                      "Attempting to fetch the first available original language transcript.")
+                # Iterate through all available transcripts in the list_obj
+                # This assumes transcript_list_obj is iterable and yields transcript objects directly.
+                # Based on your original code `next(iter(transcript_list))`, where transcript_list was from `api.list()`,
+                # this should work similarly.
+                for tr in transcript_list_obj:  # transcript_list_obj is a TranscriptList
+                    transcript_found = tr  # Get the first one and break
+                    break
+                if not transcript_found:  # If loop completed without finding any
+                    raise StopIteration  # Mimic original behavior to be caught below
+            except StopIteration:  # No transcripts at all
+                return (f"Error: No transcripts at all seem to be available for video ID '{video_id}'. "
+                        f"Checked preferred: {lang_preference_list}, English fallbacks, and any original language.")
+            except NoTranscriptFound:  # Should ideally be caught by StopIteration if list is empty
+                return (f"Error: No transcripts found for video ID '{video_id}' after trying preferred, English, and original languages.")
+        if transcript_found:  # transcript_found is a Transcript object
+            full_transcript_data = transcript_found.fetch()
             transcript_text = " ".join([segment.text
                                        for segment in full_transcript_data])
             return transcript_text
         else:
+            return (f"Error: Could not find any suitable transcript for video ID '{video_id}'. "
+                    f"Preferred languages: {lang_preference_list}. Also checked English and original languages if applicable.")
     except TranscriptsDisabled:
         return f"Error: Transcripts are disabled for video ID '{video_id}'."
+    except NoTranscriptFound:  # This can be raised by list_transcripts directly if no captions at all for the video
+        return f"Error: No transcripts whatsoever found for video ID '{video_id}'. The video might not have any captions initially."
     except Exception as e:
         error_type = type(e).__name__
+        # Check for common youtube_transcript_api specific errors
+        if "VideoUnavailable" in error_type:  # Common error from the library
+            return f"Error: Video '{video_id}' is unavailable. It might be private, deleted, or geo-restricted."
+        # Heuristic from your original code
         if "video ID" in str(e).lower() or "parameter" in str(e).lower():
+            return f"Error: Could not retrieve transcript for video ID '{video_id}'. It might be an invalid ID or other parameter issue. (API Error: {error_type})"
         return f"Error: An unexpected error occurred while fetching transcript for video ID '{video_id}': {error_type} - {str(e)}"
+def gradio_mcp_handler(video_url_or_id: str, lang_preference_str: str):
+    """
+    MCP tool handler to retrieve YouTube video transcript using youtube_transcript_api.
+    Args:
+        video_url_or_id (str): The YouTube video URL or its 11-character ID.
+        lang_preference_str (str): A comma-separated string of preferred language codes for the transcript
+                                   (e.g., "en,en-US,es"). Defaults to "en" if empty or invalid.
+    Returns:
+        str: The fetched transcript or an error message.
+    """
+    if not video_url_or_id.strip():
+        return "Error: 'video_url_or_id' argument cannot be empty."
+    if lang_preference_str and lang_preference_str.strip():
+        lang_list = [lang.strip()
+                     for lang in lang_preference_str.split(',') if lang.strip()]
+    else:
+        lang_list = ['en']
+    if not lang_list:  # Handle cases like lang_preference_str = ","
+        lang_list = ['en']
+    return get_youtube_video_transcript_scraper(video_url_or_id, lang_list)
+# Define Gradio input components for MCP
+inputs = [
+    gr.Textbox(
+        label="YouTube Video URL or ID",
+        placeholder="e.g., https://www.youtube.com/watch?v=VIDEO_ID or VIDEO_ID"
+    ),
+    gr.Textbox(
+        label="Preferred Language Codes (comma-separated)",
+        value="en,en-US",
+        placeholder="e.g., en,es,fr (default: en)"
+    )
+]
+outputs = gr.Textbox(
+    label="Transcript Output",
+    lines=15,
+    show_copy_button=True
+)
+demo = gr.Interface(
+    fn=gradio_mcp_handler,
+    inputs=inputs,
+    outputs=outputs,
+    title="YouTube Transcript Retriever (youtube-transcript-api)",
+    description=(
+        "Enter YouTube video URL/ID and comma-separated language codes to fetch transcript using 'youtube-transcript-api'. "
+        "MCP argument descriptions from handler's docstring."
+    ),
+    allow_flagging='never',
+    examples=[
+        ["https://www.youtube.com/watch?v=Sd6F2pfKJmk", "en"],
+        ["Sd6F2pfKJmk", "en,ja"],
+        ["https://www.youtube.com/watch?v=rokGy0huYEA", "ja,en"]
+    ],
+    article=(
+        "**How to Use:**\n"
+        "1. Paste YouTube video URL or 11-character video ID.\n"
+        "2. Enter comma-separated language codes (e.g., `en-GB,en,es`). Defaults to `en` if empty.\n"
+        "3. Click 'Submit'.\n\n"
+        "**MCP Server Information:**\n"
+        "Launched with `mcp_server=True`, exposes an MCP tool.\n"
+        "- Tool arguments `video_url_or_id` (str) and `lang_preference_str` (str) are defined in handler docstring.\n"
+        "- Schema: `/gradio_api/mcp/schema`. Endpoint: `/gradio_api/mcp/sse`."
+    )
+)
+if __name__ == '__main__':
+    print("Gradio app starting with your specified youtube-transcript-api methods...")
+    print("MCP Server integration enabled (mcp_server=True).")
+    print("Ensure 'gradio[mcp]' and 'youtube-transcript-api' are installed.")
+    demo.launch(mcp_server=True)

requirements.txt CHANGED Viewed

@@ -1,2 +1,3 @@
 gradio[mcp]
-youtube-transcript-api

 gradio[mcp]
+youtube-transcript-api
+google-api-python-client