Spaces:
Sleeping
Sleeping
# YouTube video analysis tool | |
import requests | |
import re | |
import pytube | |
from smolagents import Tool | |
try: | |
from youtube_transcript_api import YouTubeTranscriptApi | |
except ImportError: | |
print("YouTube Transcript API not installed. Video transcription may be limited.") | |
YouTubeTranscriptApi = None | |
class YoutubeVideoTool(Tool): | |
name = "youtube_video" | |
description = "Analyze YouTube videos to answer questions about their content." | |
inputs = { | |
"video_url": { | |
"type": "string", | |
"description": "The YouTube video URL" | |
} | |
} | |
output_type = "string" | |
def forward(self, video_url: str) -> str: | |
assert isinstance(video_url, str), "Video URL must be a string" | |
try: | |
# Extract video ID from URL | |
if "youtu.be" in video_url: | |
video_id = video_url.split("/")[-1].split("?")[0] | |
else: | |
video_id = re.search(r'v=([^&]+)', video_url).group(1) | |
# Get video info | |
yt = pytube.YouTube(video_url) | |
title = yt.title | |
author = yt.author | |
length = yt.length # in seconds | |
description = yt.description | |
# Try to get transcript | |
transcript_text = "" | |
if YouTubeTranscriptApi: | |
try: | |
transcript = YouTubeTranscriptApi.get_transcript(video_id) | |
transcript_text = "\n".join([f"{item['start']:.1f}s: {item['text']}" for item in transcript]) | |
except Exception as e: | |
transcript_text = f"Could not retrieve transcript: {str(e)}" | |
else: | |
transcript_text = "YouTube Transcript API not available" | |
result = f""" | |
YouTube Video Analysis: | |
Title: {title} | |
Author: {author} | |
Length: {length//60} minutes {length%60} seconds | |
Description: {description[:500]}... [truncated] | |
Transcript Excerpts: | |
{transcript_text[:2000]}... [transcript truncated] | |
""" | |
return result | |
except Exception as e: | |
print(f"Error analyzing YouTube video: {str(e)}") | |
return f"Error analyzing YouTube video {video_url}: {str(e)}" | |