Staticaliza commited on
Commit
4cab0f7
·
verified ·
1 Parent(s): b914581

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -24
app.py CHANGED
@@ -1,9 +1,8 @@
1
  # Imports
2
- import os
3
- import sys
4
  import gradio as gr
5
  import spaces
6
  import torch
 
7
  import librosa
8
  from PIL import Image
9
  from decord import VideoReader, cpu
@@ -32,12 +31,19 @@ footer {
32
  }
33
  '''
34
 
35
- def encode_video(video_path):
36
- def uniform_sample(idxs, n):
37
- gap = len(idxs) / n
38
- return [idxs[int(i*gap + gap/2)] for i in range(n)]
 
 
 
 
 
 
39
 
40
- vr = VideoReader(video_path, ctx=cpu(0))
 
41
  fps = round(vr.get_avg_fps())
42
  idxs = list(range(0, len(vr), fps))
43
  if len(idxs) > MAX_FRAMES:
@@ -45,29 +51,43 @@ def encode_video(video_path):
45
  frames = vr.get_batch(idxs).asnumpy()
46
  return [Image.fromarray(f.astype("uint8")) for f in frames]
47
 
 
 
 
 
 
 
 
48
  @spaces.GPU(duration=60)
49
- def generate(input=[], instruction=DEFAULT_INPUT, sampling=False, temperature=0.7, top_p=0.8, top_k=100, repetition_penalty=1.05, max_tokens=512):
50
 
51
  print(input)
52
  print(instruction)
53
 
54
- content = []
55
- if not input:
56
  return "No input provided."
57
 
58
- for path in input:
59
- ext = os.path.splitext(path)[1].lower()
60
- if ext in [".jpg", ".jpeg", ".png", ".bmp", ".gif"]:
61
- img = Image.open(path).convert("RGB")
62
- content.append(img)
63
- elif ext in [".mp4", ".mov", ".avi", ".mkv"]:
64
- frames = encode_video(path)
65
- content.extend(frames)
66
- elif ext in [".wav", ".mp3", ".flac", ".aac"]:
67
- aud, _ = librosa.load(path, sr=16000, mono=True)
68
- content.append(aud)
69
- else:
70
- continue
 
 
 
 
 
 
 
 
71
 
72
  content.append(instruction)
73
  inputs_payload = [{"role": "user", "content": content}]
@@ -95,7 +115,7 @@ def cloud():
95
  # Initialize
96
  with gr.Blocks(css=css) as main:
97
  with gr.Column():
98
- input = gr.File(label="Input", file_count="multiple", file_types=["image", "video", "audio"], type="filepath", allow_reordering=True)
99
  instruction = gr.Textbox(lines=1, value=DEFAULT_INPUT, label="Instruction")
100
  sampling = gr.Checkbox(value=False, label="Sampling")
101
  temperature = gr.Slider(minimum=0.01, maximum=1.99, step=0.01, value=0.7, label="Temperature")
 
1
  # Imports
 
 
2
  import gradio as gr
3
  import spaces
4
  import torch
5
+ import os
6
  import librosa
7
  from PIL import Image
8
  from decord import VideoReader, cpu
 
31
  }
32
  '''
33
 
34
+ filetypes = {
35
+ "Image": [".jpg", ".jpeg", ".png", ".bmp"],
36
+ "Gif": [".gif"],
37
+ "Video": [".mp4", ".mov", ".avi", ".mkv"],
38
+ "Audio": [".wav", ".mp3", ".flac", ".aac"],
39
+ }
40
+
41
+ def uniform_sample(idxs, n):
42
+ gap = len(idxs) / n
43
+ return [idxs[int(i * gap + gap / 2)] for i in range(n)]
44
 
45
+ def encode_video(path):
46
+ vr = VideoReader(path, ctx=cpu(0))
47
  fps = round(vr.get_avg_fps())
48
  idxs = list(range(0, len(vr), fps))
49
  if len(idxs) > MAX_FRAMES:
 
51
  frames = vr.get_batch(idxs).asnumpy()
52
  return [Image.fromarray(f.astype("uint8")) for f in frames]
53
 
54
+ def encode_gif(path):
55
+ img = Image.open(path)
56
+ frames = [frame.copy().convert("RGB") for frame in ImageSequence.Iterator(img)]
57
+ if len(frames) > MAX_FRAMES:
58
+ frames = uniform_sample(frames, MAX_FRAMES)
59
+ return frames
60
+
61
  @spaces.GPU(duration=60)
62
+ def generate(input, instruction=DEFAULT_INPUT, sampling=False, temperature=0.7, top_p=0.8, top_k=100, repetition_penalty=1.05, max_tokens=512):
63
 
64
  print(input)
65
  print(instruction)
66
 
67
+ if not input_file:
 
68
  return "No input provided."
69
 
70
+ extension = os.path.splitext(input_file)[1].lower()
71
+ filetype = None
72
+ for category, extensions in filetypes.items():
73
+ if extension in extensions:
74
+ filetype = category
75
+ break
76
+
77
+ content = []
78
+ if filetype == "Image":
79
+ image = Image.open(input_file).convert("RGB")
80
+ content.append(image)
81
+ elif filetype in ["Video", "Gif"]:
82
+ frames = encode_gif(input_file) if filetype == "Gif" else encode_video(input_file)
83
+ content.extend(frames)
84
+ audio = librosa.load(input_file, sr=16000, mono=True)
85
+ content.append(audio)
86
+ elif filetype == "Audio":
87
+ audio = librosa.load(input_file, sr=16000, mono=True)
88
+ content.append(audio)
89
+ else:
90
+ return "Unsupported file type."
91
 
92
  content.append(instruction)
93
  inputs_payload = [{"role": "user", "content": content}]
 
115
  # Initialize
116
  with gr.Blocks(css=css) as main:
117
  with gr.Column():
118
+ input = gr.File(label="Input", file_types=["image", "video", "audio"], type="filepath")
119
  instruction = gr.Textbox(lines=1, value=DEFAULT_INPUT, label="Instruction")
120
  sampling = gr.Checkbox(value=False, label="Sampling")
121
  temperature = gr.Slider(minimum=0.01, maximum=1.99, step=0.01, value=0.7, label="Temperature")