RCaz's picture
check extraction ok
aabba29
raw
history blame
4.12 kB
import yt_dlp
import os
def download_video(url):
"""downlad video and audio from youtube url
Args:
url (str): youtube video url
Returns:
video_filename (str): path to the downloaded video file
audio_filename (str): path to the downloaded audio file
"""
# instanciate output path
output_path='./cache'
if not os.path.exists(output_path):
os.mkdir(output_path)
# get cookies
export_cookies_path = "./cache/exported_cookies.txt"
os.makedirs(os.path.dirname(export_cookies_path), exist_ok=True)
try:
ydl_opts_export_cookies = {
'cookiesfrombrowser': ('firefox',None,None,None),
'cookiefile': export_cookies_path,
'quiet': True,
}
print(f"Attempting to export cookies from Firefox to {export_cookies_path}...")
with yt_dlp.YoutubeDL(ydl_opts_export_cookies) as ydl:
# A dummy URL is often sufficient for cookie export
ydl.extract_info("https://www.youtube.com", download=False)
print("Cookies exported successfully (if Firefox was installed and logged in).")
except yt_dlp.utils.DownloadError as e:
print(f"Could not export cookies from browser: {e}")
print("Please ensure a supported browser is installed and logged in, or manually create a 'cookies.txt' file.")
# get video
ydl_opts_video = {
'format': 'worst[ext=mp4]',
'outtmpl': output_path+'/video/'+'%(title)s_video.%(ext)s',
'quiet': True
}
print('Downloading video...')
with yt_dlp.YoutubeDL(ydl_opts_video) as ydl:
info_dict = ydl.extract_info(url, download=True)
video_filename = ydl.prepare_filename(info_dict)
# get audio
audio_opts = {
'format': 'bestaudio[ext=m4a]',
'outtmpl': output_path+'/audio/'+'%(title)s.audio.%(ext)s',
'quiet': False,
'noplaylist': True,
}
print('Downloading audio...')
with yt_dlp.YoutubeDL(audio_opts) as ydl:
info_dict = ydl.extract_info(url, download=True)
audio_filename = ydl.prepare_filename(info_dict)
return {
"video_path": video_filename,
"audio_path": audio_filename,
}
import cv2
import os
from skimage.metrics import structural_similarity as ssim
def is_significantly_different(img1, img2, threshold=0.4):
"""Check if two images are significantly different using SSIM.
Args:
img1 (numpy.ndarray): First image.
img2 (numpy.ndarray): Second image.
threshold (float): SSIM threshold to determine significant difference.
Returns:
bool: True if images are significantly different, False otherwise.
"""
grayA = cv2.cvtColor(img1, cv2.COLOR_BGR2GRAY)
grayB = cv2.cvtColor(img2, cv2.COLOR_BGR2GRAY)
score, _ = ssim(grayA, grayB, full=True)
return score < threshold # Lower score means more different
def extract_keyframes(video_path, diff_threshold=0.4):
"""Extract key frames from a video based on significant differences.
Args:
video_path (str): Path to the input video file.
output_path (str): Directory to save the extracted key frames.
diff_threshold (float): SSIM threshold to determine significant difference.
"""
cap = cv2.VideoCapture(video_path)
frame_id = 0
saved_id = 0
success, prev_frame = cap.read()
if not success:
print("Failed to read video.")
return
output_path='./cache/video/frames'
if not os.path.exists(output_path):
os.mkdir(output_path)
while True:
success, frame = cap.read()
if not success:
break
frame_id += 1
if is_significantly_different(prev_frame, frame, threshold=diff_threshold):
filename = os.path.join("./cache/video/frames/",f"keyframe_{saved_id:04d}.jpg")
cv2.imwrite(filename, frame)
prev_frame = frame
saved_id += 1
cap.release()
print(f"Extracted {saved_id} key frames.")
return "success"
# Example usage
extract_keyframes(video_path)