kirbah commited on
Commit
4955f2d
·
1 Parent(s): 666750d

Better descriptions

Browse files
Files changed (3) hide show
  1. app via API.py +239 -0
  2. app.py +154 -57
  3. requirements.txt +2 -1
app via API.py ADDED
@@ -0,0 +1,239 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import gradio as gr
3
+ from googleapiclient.discovery import build
4
+ from googleapiclient.errors import HttpError
5
+
6
+ # Re-used function to extract video ID
7
+
8
+
9
+ def _extract_video_id(youtube_url: str) -> str | None:
10
+ """
11
+ Extracts the YouTube video ID from a URL.
12
+ Handles standard, shortened, embed URLs, and direct ID.
13
+ """
14
+ # Standard URL: https://www.youtube.com/watch?v=VIDEO_ID
15
+ match = re.search(r"watch\?v=([^&]+)", youtube_url)
16
+ if match:
17
+ return match.group(1)
18
+
19
+ # Shortened URL: https://youtu.be/VIDEO_ID
20
+ match = re.search(r"youtu\.be/([^?&]+)", youtube_url)
21
+ if match:
22
+ return match.group(1)
23
+
24
+ # Embed URL: https://www.youtube.com/embed/VIDEO_ID
25
+ match = re.search(r"youtube\.com/embed/([^?&]+)", youtube_url)
26
+ if match:
27
+ return match.group(1)
28
+
29
+ # Video ID directly passed
30
+ if re.fullmatch(r"^[a-zA-Z0-9_-]{11}$", youtube_url):
31
+ return youtube_url
32
+ return None
33
+
34
+
35
+ def _parse_srt_to_text(srt_content: str) -> str:
36
+ """
37
+ Parses SRT formatted string to extract plain text.
38
+ Removes timestamps, sequence numbers, and basic HTML formatting.
39
+ """
40
+ text_lines = []
41
+ lines = srt_content.splitlines()
42
+ for line in lines:
43
+ if not line.strip() or line.strip().isdigit() or '-->' in line:
44
+ continue
45
+ line_text = re.sub(r'<[^>]+>', '', line)
46
+ text_lines.append(line_text.strip())
47
+ return " ".join(text_lines)
48
+
49
+
50
+ def get_youtube_transcript_official_api(video_url_or_id: str, api_key: str, target_language: str = 'en') -> str:
51
+ """
52
+ Retrieves the transcript for a YouTube video using the official YouTube Data API v3.
53
+ This function is intended to be exposed as an MCP tool.
54
+
55
+ Args:
56
+ video_url_or_id (str): YouTube video URL or 11-character video ID.
57
+ api_key (str): Your YouTube Data API v3 key.
58
+ target_language (str): Preferred language code for the transcript (e.g., 'en', 'es'). Defaults to 'en'.
59
+
60
+ Returns:
61
+ str: The concatenated transcript text or an error message.
62
+ """
63
+ video_id = _extract_video_id(video_url_or_id)
64
+ if not video_id:
65
+ return f"Error: Invalid YouTube video URL or ID: '{video_url_or_id}'. Could not extract a valid video ID."
66
+
67
+ if not api_key or not api_key.strip():
68
+ return "Error: YouTube Data API Key is missing. Please provide a valid API key for the 'api_key' argument."
69
+
70
+ try:
71
+ youtube = build('youtube', 'v3', developerKey=api_key)
72
+ except Exception as e:
73
+ return f"Error: Could not build YouTube API client. Check library installation. Details: {str(e)}"
74
+
75
+ try:
76
+ caption_request = youtube.captions().list(
77
+ part="snippet",
78
+ videoId=video_id
79
+ )
80
+ caption_response = caption_request.execute()
81
+
82
+ caption_id_to_download = None
83
+ found_lang_for_download = None
84
+ available_langs_details = []
85
+
86
+ for item in caption_response.get('items', []):
87
+ lang_code = item['snippet']['language']
88
+ lang_name = item['snippet'].get('name', 'N/A')
89
+ track_kind = item['snippet'].get('trackKind', 'N/A')
90
+ available_langs_details.append(
91
+ f"{lang_code} (Name: '{lang_name}', Type: {track_kind})")
92
+
93
+ if lang_code.lower() == target_language.lower():
94
+ caption_id_to_download = item['id']
95
+ found_lang_for_download = lang_code
96
+ break
97
+
98
+ if not caption_id_to_download and target_language.lower() != 'en':
99
+ for item in caption_response.get('items', []):
100
+ lang_code = item['snippet']['language']
101
+ if lang_code.lower() == 'en':
102
+ caption_id_to_download = item['id']
103
+ found_lang_for_download = lang_code
104
+ break
105
+
106
+ if not caption_id_to_download:
107
+ available_langs_str = "\n - ".join(
108
+ available_langs_details) if available_langs_details else "None listed (captions might be disabled, non-existent, or API access restricted)"
109
+ return (f"Error: No suitable caption track found for language '{target_language}' "
110
+ f"(or 'en' fallback) for video ID '{video_id}'.\n"
111
+ f"Available caption tracks:\n - {available_langs_str}")
112
+
113
+ download_request = youtube.captions().download(
114
+ id=caption_id_to_download,
115
+ tfmt='srt'
116
+ )
117
+ srt_transcript = download_request.execute()
118
+
119
+ plain_text_transcript = _parse_srt_to_text(srt_transcript)
120
+
121
+ if not plain_text_transcript.strip():
122
+ return (f"Notice: Transcript for video ID '{video_id}' (Language: {found_lang_for_download}) "
123
+ "was downloaded but appears empty after parsing. The SRT file might be malformed or contain no text.")
124
+
125
+ return plain_text_transcript
126
+
127
+ except HttpError as e:
128
+ error_content_bytes = e.content
129
+ error_details = "No additional details in error content."
130
+ if error_content_bytes:
131
+ try:
132
+ error_details = error_content_bytes.decode('utf-8')
133
+ except UnicodeDecodeError:
134
+ error_details = "Error content could not be decoded (non-UTF-8)."
135
+
136
+ status_code = e.resp.status
137
+
138
+ if status_code == 403:
139
+ if "quotaExceeded" in error_details.lower() or "daily limit exceeded" in error_details.lower():
140
+ return f"API Error (403): YouTube API quota exceeded. Details: {error_details}"
141
+ return (f"API Error (403): Forbidden. Check API Key ('api_key'), YouTube Data API v3 enablement, or video owner restrictions for video_id='{video_id}'. Details: {error_details}")
142
+ elif status_code == 404:
143
+ return (f"API Error (404): Not Found. Video ID '{video_id}' ('video_url_or_id') might be incorrect, private/deleted, or caption track missing. Details: {error_details}")
144
+ else:
145
+ return f"API Error ({status_code}): An API error occurred while processing video_id='{video_id}'. Details: {error_details}"
146
+
147
+ except Exception as e:
148
+ return f"Unexpected Error processing video_id='{video_id}': {type(e).__name__} - {str(e)}"
149
+
150
+
151
+ def gradio_interface_handler(video_url_or_id: str, api_key: str, language: str):
152
+ """
153
+ Handler function for the Gradio interface that wraps the main transcript retrieval logic.
154
+ Type hints and this docstring help Gradio generate the MCP tool schema.
155
+
156
+ Args:
157
+ video_url_or_id (str): The YouTube video URL or its 11-character ID. This description will appear in the MCP tool schema for this argument.
158
+ api_key (str): The YouTube Data API v3 key. This description will appear in the MCP tool schema for this argument.
159
+ language (str): The preferred ISO 639-1 language code for the transcript (e.g., 'en', 'es'). Defaults to 'en'. This description will appear in the MCP tool schema for this argument.
160
+
161
+ Returns:
162
+ str: The fetched transcript or an error message. This defines the tool's output.
163
+ """
164
+ if not video_url_or_id.strip():
165
+ return "Error: YouTube Video URL or ID ('video_url_or_id') input is empty. Please provide a valid URL or ID."
166
+ if not api_key.strip():
167
+ return "Error: YouTube API Key ('api_key') input is empty. Please provide your API key."
168
+
169
+ language_to_use = language.strip().lower(
170
+ ) if language and language.strip() else 'en'
171
+
172
+ return get_youtube_transcript_official_api(video_url_or_id, api_key, language_to_use)
173
+
174
+
175
+ # Define Gradio input components
176
+ # The 'label' is for the UI, and 'placeholder' provides a hint.
177
+ # The descriptions for the MCP tool arguments are derived from the docstring of 'gradio_interface_handler'.
178
+ inputs = [
179
+ gr.Textbox(
180
+ label="YouTube Video URL or ID",
181
+ placeholder="e.g., https://www.youtube.com/watch?v=dQw4w9WgXcQ or dQw4w9WgXcQ"
182
+ ),
183
+ gr.Textbox(
184
+ label="YouTube Data API Key",
185
+ type="password",
186
+ placeholder="Enter your API key (e.g., AIzaSy...)"
187
+ ),
188
+ gr.Textbox(
189
+ label="Preferred Language Code",
190
+ value="en", # Default language
191
+ placeholder="e.g., en, es, fr, de"
192
+ )
193
+ ]
194
+
195
+ # Define Gradio output component
196
+ # The 'label' is for the UI. The description for the MCP tool output is derived from the return type hint and docstring of 'gradio_interface_handler'.
197
+ outputs = gr.Textbox(
198
+ label="Transcript Output",
199
+ lines=15,
200
+ show_copy_button=True
201
+ )
202
+
203
+ # Create and launch the Gradio interface
204
+ demo = gr.Interface(
205
+ fn=gradio_interface_handler, # The function to wrap, with type hints and docstrings
206
+ inputs=inputs,
207
+ outputs=outputs,
208
+ title="YouTube Video Transcript Retriever (MCP Enabled)",
209
+ description=( # This is the main description for the Gradio UI and can also provide context for the tool.
210
+ "Enter a YouTube video URL/ID, your YouTube Data API Key, and a preferred language code "
211
+ "to fetch the video transcript. This interface also exposes an MCP tool for programmatic access. "
212
+ "The MCP tool's argument descriptions are generated from the function's docstring."
213
+ ),
214
+ allow_flagging='never',
215
+ examples=[
216
+ ["https://www.youtube.com/watch?v=dQw4w9WgXcQ", "YOUR_API_KEY_HERE", "en"],
217
+ ["Mdcw3_s2T_s", "YOUR_API_KEY_HERE", "en"],
218
+ ["https://www.youtube.com/watch?v=rokGy0huYEA", "YOUR_API_KEY_HERE", "ja"]
219
+ ],
220
+ article=(
221
+ "**Using the Web Interface:**\n"
222
+ "1. Obtain a [YouTube Data API v3 key](https://developers.google.com/youtube/v3/getting-started).\n"
223
+ "2. Ensure the YouTube Data API v3 is enabled for your project in Google Cloud Console.\n"
224
+ "3. Paste the video URL/ID, your API key, and desired language code into the respective fields.\n"
225
+ "4. Click 'Submit' to retrieve the transcript.\n\n"
226
+ "**MCP Server Information:**\n"
227
+ "When launched with `mcp_server=True`, Gradio also starts an MCP server.\n"
228
+ "- The tool schema (including argument descriptions from the function's docstring) can typically be found at `/gradio_api/mcp/schema`.\n"
229
+ "- The MCP server endpoint is usually at `/gradio_api/mcp/sse`.\n"
230
+ "This allows AI models and other MCP clients to use the transcript retrieval functionality programmatically."
231
+ )
232
+ )
233
+
234
+ if __name__ == '__main__':
235
+ print("Gradio app starting...")
236
+ print("MCP Server integration is enabled via mcp_server=True.")
237
+ print(
238
+ "Ensure 'gradio[mcp]' is installed if you encounter issues related to MCP.")
239
+ demo.launch(mcp_server=True)
app.py CHANGED
@@ -25,109 +25,206 @@ def _extract_video_id(youtube_url: str) -> str | None:
25
  return match.group(1)
26
 
27
  # Video ID directly passed
28
- # Basic check for a valid video ID format
29
  if re.fullmatch(r"^[a-zA-Z0-9_-]{11}$", youtube_url):
30
  return youtube_url
31
-
32
  return None
33
 
34
 
35
- def get_youtube_video_transcript(video_url_or_id: str, lang_preference: list[str] = ['en', 'en-US', 'en-GB']) -> str:
36
  """
37
- Retrieves the transcript for a given YouTube video URL or video ID.
38
- It tries to fetch the transcript in the preferred languages first (defaulting to English).
39
 
40
  Args:
41
  video_url_or_id (str): The full YouTube video URL (e.g., "https://www.youtube.com/watch?v=VIDEO_ID")
42
  or just the 11-character video ID.
43
- lang_preference (list[str]): A list of language codes to try for the transcript, in order of preference.
44
- Defaults to ['en', 'en-US', 'en-GB'].
45
 
46
  Returns:
47
  str: The concatenated transcript text if successful.
48
- An error message string if the transcript cannot be fetched (e.g., disabled, not found, invalid ID).
49
  """
50
  video_id = _extract_video_id(video_url_or_id)
51
 
52
  if not video_id:
53
- return f"Error: Invalid YouTube video URL or ID provided: '{video_url_or_id}'. Could not extract a valid video ID."
 
 
 
54
 
55
  try:
56
- # Fetch available transcripts
57
- transcript_list = YouTubeTranscriptApi().list(video_id)
 
 
58
 
59
- # Try to find transcript in preferred languages
60
- transcript = None
61
- for lang_code in lang_preference:
 
62
  try:
63
- transcript = transcript_list.find_transcript([lang_code])
 
 
64
  break
65
  except NoTranscriptFound:
66
  continue
67
 
68
- # If not found in preferred, try generated transcript in preferred languages
69
- if not transcript:
70
- for lang_code in lang_preference:
71
  try:
72
- transcript = transcript_list.find_generated_transcript([
73
- lang_code])
 
74
  break
75
  except NoTranscriptFound:
76
  continue
77
 
78
- # If still not found, try any available English transcript
79
- if not transcript:
80
- try:
81
- transcript = transcript_list.find_transcript(
82
- ['en', 'en-US', 'en-GB', 'en-AU', 'en-CA', 'en-IN'])
83
- except NoTranscriptFound:
84
- pass # Continue to try any generated English transcript
85
 
86
- if not transcript:
 
87
  try:
88
- transcript = transcript_list.find_generated_transcript(
89
- ['en', 'en-US', 'en-GB', 'en-AU', 'en-CA', 'en-IN'])
90
  except NoTranscriptFound:
91
- # If no English transcript, grab the first available original language transcript
92
  try:
93
- print(
94
- f"YouTubeTool: No English transcript found for {video_id}. Trying first available original language.")
95
- original_lang_transcript = next(
96
- iter(transcript_list)) # get the first one
97
- transcript = original_lang_transcript
98
- except StopIteration: # No transcripts at all
99
- return f"Error: No transcripts at all seem to be available for video ID '{video_id}'."
100
- except NoTranscriptFound: # Should be caught by StopIteration if list is empty
101
- return f"Error: No transcripts found for video ID '{video_id}' after trying preferred and English languages."
102
-
103
- if transcript:
104
- full_transcript_data = transcript.fetch()
105
- # Concatenate all text segments into a single string
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
106
  transcript_text = " ".join([segment.text
107
  for segment in full_transcript_data])
108
  return transcript_text
109
  else:
110
- # This case should ideally be covered by the fallbacks above
111
- return f"Error: Could not find a suitable transcript for video ID '{video_id}' in languages: {lang_preference} or English."
112
 
113
  except TranscriptsDisabled:
114
  return f"Error: Transcripts are disabled for video ID '{video_id}'."
115
- # This might catch cases where video ID is valid but has zero transcripts at all.
116
- except NoTranscriptFound:
117
- return f"Error: No transcripts whatsoever could be found for video ID '{video_id}'. The video might not have any captions or transcripts."
118
  except Exception as e:
119
- # Catch any other unexpected errors from the API or video ID issues not caught by regex
120
  error_type = type(e).__name__
121
- # Check for common youtube_transcript_api specific errors not explicitly caught if any
122
- # Heuristic for bad ID
 
 
123
  if "video ID" in str(e).lower() or "parameter" in str(e).lower():
124
- return f"Error: Could not retrieve transcript for video ID '{video_id}'. It might be an invalid ID or the video is private/deleted. (API Error: {error_type})"
125
  return f"Error: An unexpected error occurred while fetching transcript for video ID '{video_id}': {error_type} - {str(e)}"
126
 
127
 
128
- def greet(name):
129
- return get_youtube_video_transcript(name)
 
130
 
 
 
 
 
131
 
132
- demo = gr.Interface(fn=greet, inputs="text", outputs="text")
133
- demo.launch(mcp_server=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  return match.group(1)
26
 
27
  # Video ID directly passed
 
28
  if re.fullmatch(r"^[a-zA-Z0-9_-]{11}$", youtube_url):
29
  return youtube_url
 
30
  return None
31
 
32
 
33
+ def get_youtube_video_transcript_scraper(video_url_or_id: str, lang_preference_list: list[str]) -> str:
34
  """
35
+ Retrieves the transcript for a given YouTube video URL or video ID using your specified youtube_transcript_api methods.
36
+ It tries to fetch the transcript in the preferred languages first.
37
 
38
  Args:
39
  video_url_or_id (str): The full YouTube video URL (e.g., "https://www.youtube.com/watch?v=VIDEO_ID")
40
  or just the 11-character video ID.
41
+ lang_preference_list (list[str]): A list of language codes to try for the transcript, in order of preference.
42
+ Example: ['en', 'en-US', 'es'].
43
 
44
  Returns:
45
  str: The concatenated transcript text if successful.
46
+ An error message string if the transcript cannot be fetched.
47
  """
48
  video_id = _extract_video_id(video_url_or_id)
49
 
50
  if not video_id:
51
+ return f"Error: Invalid YouTube video URL or ID: '{video_url_or_id}'. Could not extract a valid video ID."
52
+
53
+ if not lang_preference_list:
54
+ return "Error: Language preference list ('lang_preference_list') cannot be empty."
55
 
56
  try:
57
+ # Using your specified API instantiation and list method
58
+ api = YouTubeTranscriptApi()
59
+ transcript_list_obj = api.list(
60
+ video_id) # This is TranscriptList object
61
 
62
+ transcript_found = None
63
+
64
+ # Try to find manually created transcript in preferred languages
65
+ for lang_code in lang_preference_list:
66
  try:
67
+ # Using your specified find_transcript method
68
+ transcript_found = transcript_list_obj.find_transcript([
69
+ lang_code])
70
  break
71
  except NoTranscriptFound:
72
  continue
73
 
74
+ # If not found, try generated transcript in preferred languages
75
+ if not transcript_found:
76
+ for lang_code in lang_preference_list:
77
  try:
78
+ # Using your specified find_generated_transcript method
79
+ transcript_found = transcript_list_obj.find_generated_transcript([
80
+ lang_code])
81
  break
82
  except NoTranscriptFound:
83
  continue
84
 
85
+ # Fallback logic (similar to your original code's structure)
86
+ english_fallbacks = ['en', 'en-US', 'en-GB', 'en-AU', 'en-CA', 'en-IN']
87
+ already_tried_english = any(lang.lower().startswith(
88
+ 'en') for lang in lang_preference_list)
 
 
 
89
 
90
+ if not transcript_found and not already_tried_english:
91
+ # Try any available English transcript (manual first)
92
  try:
93
+ transcript_found = transcript_list_obj.find_transcript(
94
+ english_fallbacks)
95
  except NoTranscriptFound:
96
+ # Then try generated English
97
  try:
98
+ transcript_found = transcript_list_obj.find_generated_transcript(
99
+ english_fallbacks)
100
+ except NoTranscriptFound:
101
+ pass # No English transcript found
102
+
103
+ # If still not found, try the first available original language (as per your initial logic)
104
+ if not transcript_found:
105
+ try:
106
+ # This part requires iterating through the TranscriptList object if no specific methods like "get_first" exist.
107
+ # Your original code used `next(iter(transcript_list_obj))` which implies the object is iterable.
108
+ # Let's assume the TranscriptList object itself can be iterated or has a way to get its items.
109
+ # A more direct way, if the object behaves like a list of available transcripts:
110
+ print(f"Notice: No transcript found in preferred languages or English for video ID '{video_id}'. "
111
+ "Attempting to fetch the first available original language transcript.")
112
+
113
+ # Iterate through all available transcripts in the list_obj
114
+ # This assumes transcript_list_obj is iterable and yields transcript objects directly.
115
+ # Based on your original code `next(iter(transcript_list))`, where transcript_list was from `api.list()`,
116
+ # this should work similarly.
117
+ for tr in transcript_list_obj: # transcript_list_obj is a TranscriptList
118
+ transcript_found = tr # Get the first one and break
119
+ break
120
+ if not transcript_found: # If loop completed without finding any
121
+ raise StopIteration # Mimic original behavior to be caught below
122
+
123
+ except StopIteration: # No transcripts at all
124
+ return (f"Error: No transcripts at all seem to be available for video ID '{video_id}'. "
125
+ f"Checked preferred: {lang_preference_list}, English fallbacks, and any original language.")
126
+ except NoTranscriptFound: # Should ideally be caught by StopIteration if list is empty
127
+ return (f"Error: No transcripts found for video ID '{video_id}' after trying preferred, English, and original languages.")
128
+
129
+ if transcript_found: # transcript_found is a Transcript object
130
+ full_transcript_data = transcript_found.fetch()
131
  transcript_text = " ".join([segment.text
132
  for segment in full_transcript_data])
133
  return transcript_text
134
  else:
135
+ return (f"Error: Could not find any suitable transcript for video ID '{video_id}'. "
136
+ f"Preferred languages: {lang_preference_list}. Also checked English and original languages if applicable.")
137
 
138
  except TranscriptsDisabled:
139
  return f"Error: Transcripts are disabled for video ID '{video_id}'."
140
+ except NoTranscriptFound: # This can be raised by list_transcripts directly if no captions at all for the video
141
+ return f"Error: No transcripts whatsoever found for video ID '{video_id}'. The video might not have any captions initially."
 
142
  except Exception as e:
 
143
  error_type = type(e).__name__
144
+ # Check for common youtube_transcript_api specific errors
145
+ if "VideoUnavailable" in error_type: # Common error from the library
146
+ return f"Error: Video '{video_id}' is unavailable. It might be private, deleted, or geo-restricted."
147
+ # Heuristic from your original code
148
  if "video ID" in str(e).lower() or "parameter" in str(e).lower():
149
+ return f"Error: Could not retrieve transcript for video ID '{video_id}'. It might be an invalid ID or other parameter issue. (API Error: {error_type})"
150
  return f"Error: An unexpected error occurred while fetching transcript for video ID '{video_id}': {error_type} - {str(e)}"
151
 
152
 
153
+ def gradio_mcp_handler(video_url_or_id: str, lang_preference_str: str):
154
+ """
155
+ MCP tool handler to retrieve YouTube video transcript using youtube_transcript_api.
156
 
157
+ Args:
158
+ video_url_or_id (str): The YouTube video URL or its 11-character ID.
159
+ lang_preference_str (str): A comma-separated string of preferred language codes for the transcript
160
+ (e.g., "en,en-US,es"). Defaults to "en" if empty or invalid.
161
 
162
+ Returns:
163
+ str: The fetched transcript or an error message.
164
+ """
165
+ if not video_url_or_id.strip():
166
+ return "Error: 'video_url_or_id' argument cannot be empty."
167
+
168
+ if lang_preference_str and lang_preference_str.strip():
169
+ lang_list = [lang.strip()
170
+ for lang in lang_preference_str.split(',') if lang.strip()]
171
+ else:
172
+ lang_list = ['en']
173
+
174
+ if not lang_list: # Handle cases like lang_preference_str = ","
175
+ lang_list = ['en']
176
+
177
+ return get_youtube_video_transcript_scraper(video_url_or_id, lang_list)
178
+
179
+
180
+ # Define Gradio input components for MCP
181
+ inputs = [
182
+ gr.Textbox(
183
+ label="YouTube Video URL or ID",
184
+ placeholder="e.g., https://www.youtube.com/watch?v=VIDEO_ID or VIDEO_ID"
185
+ ),
186
+ gr.Textbox(
187
+ label="Preferred Language Codes (comma-separated)",
188
+ value="en,en-US",
189
+ placeholder="e.g., en,es,fr (default: en)"
190
+ )
191
+ ]
192
+
193
+ outputs = gr.Textbox(
194
+ label="Transcript Output",
195
+ lines=15,
196
+ show_copy_button=True
197
+ )
198
+
199
+ demo = gr.Interface(
200
+ fn=gradio_mcp_handler,
201
+ inputs=inputs,
202
+ outputs=outputs,
203
+ title="YouTube Transcript Retriever (youtube-transcript-api)",
204
+ description=(
205
+ "Enter YouTube video URL/ID and comma-separated language codes to fetch transcript using 'youtube-transcript-api'. "
206
+ "MCP argument descriptions from handler's docstring."
207
+ ),
208
+ allow_flagging='never',
209
+ examples=[
210
+ ["https://www.youtube.com/watch?v=Sd6F2pfKJmk", "en"],
211
+ ["Sd6F2pfKJmk", "en,ja"],
212
+ ["https://www.youtube.com/watch?v=rokGy0huYEA", "ja,en"]
213
+ ],
214
+ article=(
215
+ "**How to Use:**\n"
216
+ "1. Paste YouTube video URL or 11-character video ID.\n"
217
+ "2. Enter comma-separated language codes (e.g., `en-GB,en,es`). Defaults to `en` if empty.\n"
218
+ "3. Click 'Submit'.\n\n"
219
+ "**MCP Server Information:**\n"
220
+ "Launched with `mcp_server=True`, exposes an MCP tool.\n"
221
+ "- Tool arguments `video_url_or_id` (str) and `lang_preference_str` (str) are defined in handler docstring.\n"
222
+ "- Schema: `/gradio_api/mcp/schema`. Endpoint: `/gradio_api/mcp/sse`."
223
+ )
224
+ )
225
+
226
+ if __name__ == '__main__':
227
+ print("Gradio app starting with your specified youtube-transcript-api methods...")
228
+ print("MCP Server integration enabled (mcp_server=True).")
229
+ print("Ensure 'gradio[mcp]' and 'youtube-transcript-api' are installed.")
230
+ demo.launch(mcp_server=True)
requirements.txt CHANGED
@@ -1,2 +1,3 @@
1
  gradio[mcp]
2
- youtube-transcript-api
 
 
1
  gradio[mcp]
2
+ youtube-transcript-api
3
+ google-api-python-client