GaiaAgentEvaluator / tools /youtube_tool.py
davidgturner's picture
- changes for app.py
08e2c16
# YouTube video analysis tool
import requests
import re
import pytube
from smolagents import Tool
try:
from youtube_transcript_api import YouTubeTranscriptApi
except ImportError:
print("YouTube Transcript API not installed. Video transcription may be limited.")
YouTubeTranscriptApi = None
class YoutubeVideoTool(Tool):
name = "youtube_video"
description = "Analyze YouTube videos to answer questions about their content."
inputs = {
"video_url": {
"type": "string",
"description": "The YouTube video URL"
}
}
output_type = "string"
def forward(self, video_url: str) -> str:
assert isinstance(video_url, str), "Video URL must be a string"
try:
# Extract video ID from URL
if "youtu.be" in video_url:
video_id = video_url.split("/")[-1].split("?")[0]
else:
video_id = re.search(r'v=([^&]+)', video_url).group(1)
# Get video info
yt = pytube.YouTube(video_url)
title = yt.title
author = yt.author
length = yt.length # in seconds
description = yt.description
# Try to get transcript
transcript_text = ""
if YouTubeTranscriptApi:
try:
transcript = YouTubeTranscriptApi.get_transcript(video_id)
transcript_text = "\n".join([f"{item['start']:.1f}s: {item['text']}" for item in transcript])
except Exception as e:
transcript_text = f"Could not retrieve transcript: {str(e)}"
else:
transcript_text = "YouTube Transcript API not available"
result = f"""
YouTube Video Analysis:
Title: {title}
Author: {author}
Length: {length//60} minutes {length%60} seconds
Description: {description[:500]}... [truncated]
Transcript Excerpts:
{transcript_text[:2000]}... [transcript truncated]
"""
return result
except Exception as e:
print(f"Error analyzing YouTube video: {str(e)}")
return f"Error analyzing YouTube video {video_url}: {str(e)}"