Spaces:

Staticaliza
/

Sense

Running

App Files Files Community

Staticaliza commited on May 28

Commit

4cab0f7

verified ·

1 Parent(s): b914581

Update app.py

Browse files

Files changed (1) hide show

app.py +44 -24

app.py CHANGED Viewed

@@ -1,9 +1,8 @@
 # Imports
-import os
-import sys
 import gradio as gr
 import spaces
 import torch
 import librosa
 from PIL import Image
 from decord import VideoReader, cpu
@@ -32,12 +31,19 @@ footer {
 }
 '''
-def encode_video(video_path):
-    def uniform_sample(idxs, n):
-        gap = len(idxs) / n
-        return [idxs[int(i*gap + gap/2)] for i in range(n)]
-    vr = VideoReader(video_path, ctx=cpu(0))
     fps = round(vr.get_avg_fps())
     idxs = list(range(0, len(vr), fps))
     if len(idxs) > MAX_FRAMES:
@@ -45,29 +51,43 @@ def encode_video(video_path):
     frames = vr.get_batch(idxs).asnumpy()
     return [Image.fromarray(f.astype("uint8")) for f in frames]
 @spaces.GPU(duration=60)
-def generate(input=[], instruction=DEFAULT_INPUT, sampling=False, temperature=0.7, top_p=0.8, top_k=100, repetition_penalty=1.05, max_tokens=512):
     print(input)
     print(instruction)
-    content = []
-    if not input:
         return "No input provided."
-    for path in input:
-        ext = os.path.splitext(path)[1].lower()
-        if ext in [".jpg", ".jpeg", ".png", ".bmp", ".gif"]:
-            img = Image.open(path).convert("RGB")
-            content.append(img)
-        elif ext in [".mp4", ".mov", ".avi", ".mkv"]:
-            frames = encode_video(path)
-            content.extend(frames)
-        elif ext in [".wav", ".mp3", ".flac", ".aac"]:
-            aud, _ = librosa.load(path, sr=16000, mono=True)
-            content.append(aud)
-        else:
-            continue
     content.append(instruction)
     inputs_payload = [{"role": "user", "content": content}]
@@ -95,7 +115,7 @@ def cloud():
 # Initialize
 with gr.Blocks(css=css) as main:
     with gr.Column():
-        input = gr.File(label="Input", file_count="multiple", file_types=["image", "video", "audio"], type="filepath", allow_reordering=True)
         instruction = gr.Textbox(lines=1, value=DEFAULT_INPUT, label="Instruction")
         sampling = gr.Checkbox(value=False, label="Sampling")
         temperature = gr.Slider(minimum=0.01, maximum=1.99, step=0.01, value=0.7, label="Temperature")

 # Imports
 import gradio as gr
 import spaces
 import torch
+import os
 import librosa
 from PIL import Image
 from decord import VideoReader, cpu
 }
 '''
+filetypes = {
+    "Image": [".jpg", ".jpeg", ".png", ".bmp"],
+    "Gif": [".gif"],
+    "Video": [".mp4", ".mov", ".avi", ".mkv"],
+    "Audio": [".wav", ".mp3", ".flac", ".aac"],
+}
+def uniform_sample(idxs, n):
+    gap = len(idxs) / n
+    return [idxs[int(i * gap + gap / 2)] for i in range(n)]
+def encode_video(path):
+    vr = VideoReader(path, ctx=cpu(0))
     fps = round(vr.get_avg_fps())
     idxs = list(range(0, len(vr), fps))
     if len(idxs) > MAX_FRAMES:
     frames = vr.get_batch(idxs).asnumpy()
     return [Image.fromarray(f.astype("uint8")) for f in frames]
+def encode_gif(path):
+    img = Image.open(path)
+    frames = [frame.copy().convert("RGB") for frame in ImageSequence.Iterator(img)]
+    if len(frames) > MAX_FRAMES:
+        frames = uniform_sample(frames, MAX_FRAMES)
+    return frames
 @spaces.GPU(duration=60)
+def generate(input, instruction=DEFAULT_INPUT, sampling=False, temperature=0.7, top_p=0.8, top_k=100, repetition_penalty=1.05, max_tokens=512):
     print(input)
     print(instruction)
+    if not input_file:
         return "No input provided."
+    extension = os.path.splitext(input_file)[1].lower()
+    filetype = None
+    for category, extensions in filetypes.items():
+        if extension in extensions:
+            filetype = category
+            break
+    content = []
+    if filetype == "Image":
+        image = Image.open(input_file).convert("RGB")
+        content.append(image)
+    elif filetype in ["Video", "Gif"]:
+        frames = encode_gif(input_file) if filetype == "Gif" else encode_video(input_file)
+        content.extend(frames)
+        audio = librosa.load(input_file, sr=16000, mono=True)
+        content.append(audio)
+    elif filetype == "Audio":
+        audio = librosa.load(input_file, sr=16000, mono=True)
+        content.append(audio)
+    else:
+        return "Unsupported file type."
     content.append(instruction)
     inputs_payload = [{"role": "user", "content": content}]
 # Initialize
 with gr.Blocks(css=css) as main:
     with gr.Column():
+        input = gr.File(label="Input", file_types=["image", "video", "audio"], type="filepath")
         instruction = gr.Textbox(lines=1, value=DEFAULT_INPUT, label="Instruction")
         sampling = gr.Checkbox(value=False, label="Sampling")
         temperature = gr.Slider(minimum=0.01, maximum=1.99, step=0.01, value=0.7, label="Temperature")