Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -3,8 +3,10 @@ import gradio as gr
|
|
3 |
import spaces
|
4 |
import torch
|
5 |
import os
|
|
|
6 |
import gc
|
7 |
import librosa
|
|
|
8 |
from PIL import Image, ImageSequence
|
9 |
from decord import VideoReader, cpu
|
10 |
from moviepy.editor import VideoFileClip
|
@@ -64,19 +66,22 @@ def frames_from_video(path):
|
|
64 |
|
65 |
def audio_from_video(path):
|
66 |
clip = VideoFileClip(path)
|
67 |
-
|
|
|
|
|
|
|
68 |
clip.close()
|
69 |
-
return
|
70 |
|
71 |
def load_audio(path):
|
72 |
audio_np, _ = librosa.load(path, sr = AUDIO_SR, mono = True)
|
73 |
return audio_np
|
74 |
|
75 |
def build_video_omni(path, prefix, instruction):
|
76 |
-
frames
|
77 |
-
audio
|
78 |
contents = [prefix + instruction]
|
79 |
-
total
|
80 |
for i in range(total):
|
81 |
frame = frames[i] if i < len(frames) else frames[-1]
|
82 |
chunk = audio[AUDIO_SR * i : AUDIO_SR * (i + 1)]
|
|
|
3 |
import spaces
|
4 |
import torch
|
5 |
import os
|
6 |
+
import math
|
7 |
import gc
|
8 |
import librosa
|
9 |
+
import tempfile
|
10 |
from PIL import Image, ImageSequence
|
11 |
from decord import VideoReader, cpu
|
12 |
from moviepy.editor import VideoFileClip
|
|
|
66 |
|
67 |
def audio_from_video(path):
|
68 |
clip = VideoFileClip(path)
|
69 |
+
with tempfile.NamedTemporaryFile(suffix = ".wav", delete = True) as tmp:
|
70 |
+
clip.audio.write_audiofile(tmp.name, codec = "pcm_s16le",
|
71 |
+
fps = AUDIO_SR, verbose = False, logger = None)
|
72 |
+
audio_np, _ = librosa.load(tmp.name, sr = AUDIO_SR, mono = True)
|
73 |
clip.close()
|
74 |
+
return audio_np
|
75 |
|
76 |
def load_audio(path):
|
77 |
audio_np, _ = librosa.load(path, sr = AUDIO_SR, mono = True)
|
78 |
return audio_np
|
79 |
|
80 |
def build_video_omni(path, prefix, instruction):
|
81 |
+
frames = frames_from_video(path)
|
82 |
+
audio = audio_from_video(path)
|
83 |
contents = [prefix + instruction]
|
84 |
+
total = max(len(frames), math.ceil(len(audio) / AUDIO_SR))
|
85 |
for i in range(total):
|
86 |
frame = frames[i] if i < len(frames) else frames[-1]
|
87 |
chunk = audio[AUDIO_SR * i : AUDIO_SR * (i + 1)]
|