Staticaliza commited on
Commit
38e087a
·
verified ·
1 Parent(s): 8fa5734

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -5
app.py CHANGED
@@ -3,8 +3,10 @@ import gradio as gr
3
  import spaces
4
  import torch
5
  import os
 
6
  import gc
7
  import librosa
 
8
  from PIL import Image, ImageSequence
9
  from decord import VideoReader, cpu
10
  from moviepy.editor import VideoFileClip
@@ -64,19 +66,22 @@ def frames_from_video(path):
64
 
65
  def audio_from_video(path):
66
  clip = VideoFileClip(path)
67
- wav = clip.audio.to_soundarray(fps = AUDIO_SR)
 
 
 
68
  clip.close()
69
- return librosa.to_mono(wav.T)
70
 
71
  def load_audio(path):
72
  audio_np, _ = librosa.load(path, sr = AUDIO_SR, mono = True)
73
  return audio_np
74
 
75
  def build_video_omni(path, prefix, instruction):
76
- frames = frames_from_video(path)
77
- audio = audio_from_video(path)
78
  contents = [prefix + instruction]
79
- total = max(len(frames), math.ceil(len(audio) / AUDIO_SR))
80
  for i in range(total):
81
  frame = frames[i] if i < len(frames) else frames[-1]
82
  chunk = audio[AUDIO_SR * i : AUDIO_SR * (i + 1)]
 
3
  import spaces
4
  import torch
5
  import os
6
+ import math
7
  import gc
8
  import librosa
9
+ import tempfile
10
  from PIL import Image, ImageSequence
11
  from decord import VideoReader, cpu
12
  from moviepy.editor import VideoFileClip
 
66
 
67
  def audio_from_video(path):
68
  clip = VideoFileClip(path)
69
+ with tempfile.NamedTemporaryFile(suffix = ".wav", delete = True) as tmp:
70
+ clip.audio.write_audiofile(tmp.name, codec = "pcm_s16le",
71
+ fps = AUDIO_SR, verbose = False, logger = None)
72
+ audio_np, _ = librosa.load(tmp.name, sr = AUDIO_SR, mono = True)
73
  clip.close()
74
+ return audio_np
75
 
76
  def load_audio(path):
77
  audio_np, _ = librosa.load(path, sr = AUDIO_SR, mono = True)
78
  return audio_np
79
 
80
  def build_video_omni(path, prefix, instruction):
81
+ frames = frames_from_video(path)
82
+ audio = audio_from_video(path)
83
  contents = [prefix + instruction]
84
+ total = max(len(frames), math.ceil(len(audio) / AUDIO_SR))
85
  for i in range(total):
86
  frame = frames[i] if i < len(frames) else frames[-1]
87
  chunk = audio[AUDIO_SR * i : AUDIO_SR * (i + 1)]