Staticaliza commited on
Commit
7820541
·
verified ·
1 Parent(s): 4cab0f7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -11
app.py CHANGED
@@ -4,7 +4,7 @@ import spaces
4
  import torch
5
  import os
6
  import librosa
7
- from PIL import Image
8
  from decord import VideoReader, cpu
9
  from transformers import AutoModel, AutoTokenizer, AutoProcessor
10
 
@@ -31,9 +31,16 @@ footer {
31
  }
32
  '''
33
 
 
 
 
 
 
 
 
34
  filetypes = {
35
  "Image": [".jpg", ".jpeg", ".png", ".bmp"],
36
- "Gif": [".gif"],
37
  "Video": [".mp4", ".mov", ".avi", ".mkv"],
38
  "Audio": [".wav", ".mp3", ".flac", ".aac"],
39
  }
@@ -60,14 +67,13 @@ def encode_gif(path):
60
 
61
  @spaces.GPU(duration=60)
62
  def generate(input, instruction=DEFAULT_INPUT, sampling=False, temperature=0.7, top_p=0.8, top_k=100, repetition_penalty=1.05, max_tokens=512):
63
-
64
  print(input)
65
  print(instruction)
66
 
67
- if not input_file:
68
  return "No input provided."
69
 
70
- extension = os.path.splitext(input_file)[1].lower()
71
  filetype = None
72
  for category, extensions in filetypes.items():
73
  if extension in extensions:
@@ -76,20 +82,22 @@ def generate(input, instruction=DEFAULT_INPUT, sampling=False, temperature=0.7,
76
 
77
  content = []
78
  if filetype == "Image":
79
- image = Image.open(input_file).convert("RGB")
80
  content.append(image)
81
- elif filetype in ["Video", "Gif"]:
82
- frames = encode_gif(input_file) if filetype == "Gif" else encode_video(input_file)
83
  content.extend(frames)
84
- audio = librosa.load(input_file, sr=16000, mono=True)
85
  content.append(audio)
86
  elif filetype == "Audio":
87
- audio = librosa.load(input_file, sr=16000, mono=True)
88
  content.append(audio)
89
  else:
90
  return "Unsupported file type."
91
 
92
- content.append(instruction)
 
 
93
  inputs_payload = [{"role": "user", "content": content}]
94
 
95
  params = {
 
4
  import torch
5
  import os
6
  import librosa
7
+ from PIL import Image, ImageSequence
8
  from decord import VideoReader, cpu
9
  from transformers import AutoModel, AutoTokenizer, AutoProcessor
10
 
 
31
  }
32
  '''
33
 
34
+ input_prefixes = {
35
+ "Image": "(A image file called █ has been attached) ",
36
+ "GIF": "(A GIF file called █ has been attached) ",
37
+ "Video": "(A video with audio file called █ has been attached) ",
38
+ "Audio": "(A audio file called █ has been attached) ",
39
+ }
40
+
41
  filetypes = {
42
  "Image": [".jpg", ".jpeg", ".png", ".bmp"],
43
+ "GIF": [".gif"],
44
  "Video": [".mp4", ".mov", ".avi", ".mkv"],
45
  "Audio": [".wav", ".mp3", ".flac", ".aac"],
46
  }
 
67
 
68
  @spaces.GPU(duration=60)
69
  def generate(input, instruction=DEFAULT_INPUT, sampling=False, temperature=0.7, top_p=0.8, top_k=100, repetition_penalty=1.05, max_tokens=512):
 
70
  print(input)
71
  print(instruction)
72
 
73
+ if not input:
74
  return "No input provided."
75
 
76
+ extension = os.path.splitext(input)[1].lower()
77
  filetype = None
78
  for category, extensions in filetypes.items():
79
  if extension in extensions:
 
82
 
83
  content = []
84
  if filetype == "Image":
85
+ image = Image.open(input).convert("RGB")
86
  content.append(image)
87
+ elif filetype in ["Video", "GIF"]:
88
+ frames = encode_gif(input) if filetype == "GIF" else encode_video(input_file)
89
  content.extend(frames)
90
+ audio = librosa.load(input, sr=16000, mono=True)
91
  content.append(audio)
92
  elif filetype == "Audio":
93
+ audio = librosa.load(input, sr=16000, mono=True)
94
  content.append(audio)
95
  else:
96
  return "Unsupported file type."
97
 
98
+ filename = os.path.basename(input_file)
99
+ prefix = input_prefixes[filetype].replace("█", filename)
100
+ content.append(prefix + instruction)
101
  inputs_payload = [{"role": "user", "content": content}]
102
 
103
  params = {