Spaces:
Runtime error
Runtime error
File size: 3,087 Bytes
7efde76 f0272e1 3c47ae7 f0272e1 940df70 3c47ae7 940df70 3c47ae7 f0272e1 3c47ae7 7efde76 3c47ae7 7efde76 3c47ae7 ef5efca 3c47ae7 3b421e3 3c47ae7 f0272e1 7efde76 3c47ae7 f0272e1 3c47ae7 f0272e1 3c47ae7 f0272e1 3c47ae7 7efde76 3c47ae7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 |
import gradio as gr
import torch
from llava.model.builder import load_pretrained_model
from llava.mm_utils import get_model_name_from_path, process_images, tokenizer_image_token
from llava.constants import IMAGE_TOKEN_INDEX, DEFAULT_IMAGE_TOKEN
from llava.conversation import conv_templates
import copy
from decord import VideoReader, cpu
import numpy as np
title = "# 📸 Instagram Reels Analiz Aracı"
description = """Bu araç, yüklenen Instagram Reels videolarını analiz eder ve içeriği özetler.
Video hakkında genel bir açıklama yapar ve klipte neler olup bittiğini adım adım anlatır."""
def load_video(video_path, max_frames_num=64, fps=1):
vr = VideoReader(video_path, ctx=cpu(0))
total_frame_num = len(vr)
frame_idx = list(range(0, total_frame_num, int(vr.get_avg_fps() / fps)))
if len(frame_idx) > max_frames_num:
frame_idx = np.linspace(0, total_frame_num - 1, max_frames_num, dtype=int).tolist()
video_frames = vr.get_batch(frame_idx).asnumpy()
return video_frames, len(frame_idx)
# Model yükleme
pretrained = "lmms-lab/LLaVA-Video-7B-Qwen2"
model_name = "llava_qwen"
device = "cuda" if torch.cuda.is_available() else "cpu"
print("Model yükleniyor...")
tokenizer, model, image_processor, max_length = load_pretrained_model(pretrained, None, model_name, torch_dtype="bfloat16", device_map="auto")
model.eval()
print("Model başarıyla yüklendi!")
def analyze_reel(video_path):
video_frames, frame_count = load_video(video_path)
video = image_processor.preprocess(video_frames, return_tensors="pt")["pixel_values"].to(device).bfloat16()
prompt = f"{DEFAULT_IMAGE_TOKEN}Bu Instagram Reels videosunu analiz et. Önce videonun genel içeriğini özetle, ardından klipte neler olup bittiğini adım adım açıkla. Video {frame_count} kareye bölünmüştür."
conv = copy.deepcopy(conv_templates["qwen_1_5"])
conv.append_message(conv.roles[0], prompt)
conv.append_message(conv.roles[1], None)
prompt = conv.get_prompt()
input_ids = tokenizer_image_token(prompt, tokenizer, IMAGE_TOKEN_INDEX, return_tensors="pt").unsqueeze(0).to(device)
with torch.no_grad():
output = model.generate(
input_ids,
images=[video],
modalities=["video"],
do_sample=False,
temperature=0,
max_new_tokens=1024,
)
response = tokenizer.batch_decode(output, skip_special_tokens=True)[0].strip()
return response
def gradio_interface(video_file):
if video_file is None:
return "Lütfen bir video dosyası yükleyin."
return analyze_reel(video_file)
with gr.Blocks() as demo:
gr.Markdown(title)
gr.Markdown(description)
with gr.Row():
video_input = gr.Video(label="Instagram Reels Videosu")
output = gr.Textbox(label="Analiz Sonucu", lines=10)
analyze_button = gr.Button("Reels'i Analiz Et")
analyze_button.click(fn=gradio_interface, inputs=video_input, outputs=output)
if __name__ == "__main__":
demo.launch(share=True) |