File size: 3,087 Bytes
7efde76
f0272e1
3c47ae7
 
 
 
 
f0272e1
 
940df70
3c47ae7
 
 
940df70
3c47ae7
f0272e1
3c47ae7
 
 
 
 
 
 
 
 
 
 
 
 
7efde76
3c47ae7
 
 
 
7efde76
3c47ae7
 
 
ef5efca
3c47ae7
3b421e3
3c47ae7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f0272e1
7efde76
3c47ae7
 
 
 
 
f0272e1
3c47ae7
 
f0272e1
 
3c47ae7
 
f0272e1
3c47ae7
 
7efde76
 
3c47ae7
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import gradio as gr
import torch
from llava.model.builder import load_pretrained_model
from llava.mm_utils import get_model_name_from_path, process_images, tokenizer_image_token
from llava.constants import IMAGE_TOKEN_INDEX, DEFAULT_IMAGE_TOKEN
from llava.conversation import conv_templates
import copy
from decord import VideoReader, cpu
import numpy as np

title = "# 📸 Instagram Reels Analiz Aracı"
description = """Bu araç, yüklenen Instagram Reels videolarını analiz eder ve içeriği özetler. 
Video hakkında genel bir açıklama yapar ve klipte neler olup bittiğini adım adım anlatır."""

def load_video(video_path, max_frames_num=64, fps=1):
    vr = VideoReader(video_path, ctx=cpu(0))
    total_frame_num = len(vr)
    frame_idx = list(range(0, total_frame_num, int(vr.get_avg_fps() / fps)))
    
    if len(frame_idx) > max_frames_num:
        frame_idx = np.linspace(0, total_frame_num - 1, max_frames_num, dtype=int).tolist()
    
    video_frames = vr.get_batch(frame_idx).asnumpy()
    return video_frames, len(frame_idx)

# Model yükleme
pretrained = "lmms-lab/LLaVA-Video-7B-Qwen2"
model_name = "llava_qwen"
device = "cuda" if torch.cuda.is_available() else "cpu"

print("Model yükleniyor...")
tokenizer, model, image_processor, max_length = load_pretrained_model(pretrained, None, model_name, torch_dtype="bfloat16", device_map="auto")
model.eval()
print("Model başarıyla yüklendi!")

def analyze_reel(video_path):
    video_frames, frame_count = load_video(video_path)
    video = image_processor.preprocess(video_frames, return_tensors="pt")["pixel_values"].to(device).bfloat16()
    
    prompt = f"{DEFAULT_IMAGE_TOKEN}Bu Instagram Reels videosunu analiz et. Önce videonun genel içeriğini özetle, ardından klipte neler olup bittiğini adım adım açıkla. Video {frame_count} kareye bölünmüştür."
    
    conv = copy.deepcopy(conv_templates["qwen_1_5"])
    conv.append_message(conv.roles[0], prompt)
    conv.append_message(conv.roles[1], None)
    prompt = conv.get_prompt()
    
    input_ids = tokenizer_image_token(prompt, tokenizer, IMAGE_TOKEN_INDEX, return_tensors="pt").unsqueeze(0).to(device)
    
    with torch.no_grad():
        output = model.generate(
            input_ids,
            images=[video],
            modalities=["video"],
            do_sample=False,
            temperature=0,
            max_new_tokens=1024,
        )
    
    response = tokenizer.batch_decode(output, skip_special_tokens=True)[0].strip()
    return response

def gradio_interface(video_file):
    if video_file is None:
        return "Lütfen bir video dosyası yükleyin."
    return analyze_reel(video_file)

with gr.Blocks() as demo:
    gr.Markdown(title)
    gr.Markdown(description)
    
    with gr.Row():
        video_input = gr.Video(label="Instagram Reels Videosu")
        output = gr.Textbox(label="Analiz Sonucu", lines=10)
    
    analyze_button = gr.Button("Reels'i Analiz Et")
    analyze_button.click(fn=gradio_interface, inputs=video_input, outputs=output)

if __name__ == "__main__":
    demo.launch(share=True)