Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -56,23 +56,19 @@ def generate(image, video, audio, instruction=DEFAULT_INPUT, sampling=False, tem
|
|
56 |
if image is not None:
|
57 |
img = Image.fromarray(image.astype("uint8"), "RGB")
|
58 |
inputs = [{"role": "user", "content": [img, instruction]}]
|
59 |
-
media = img
|
60 |
elif video is not None:
|
61 |
vid = encode_video(video)
|
62 |
-
inputs = [{"role": "user", "content":
|
63 |
-
media = vid
|
64 |
elif audio is not None:
|
65 |
if isinstance(audio, str):
|
66 |
aud, _ = librosa.load(audio, sr=16000, mono=True)
|
67 |
else:
|
68 |
aud = audio
|
69 |
inputs = [{"role": "user", "content": [aud, instruction]}]
|
70 |
-
media = aud
|
71 |
else:
|
72 |
return "No input provided."
|
73 |
|
74 |
params = {
|
75 |
-
"image": media,
|
76 |
"msgs": inputs,
|
77 |
"tokenizer": tokenizer,
|
78 |
#"processor": processor,
|
|
|
56 |
if image is not None:
|
57 |
img = Image.fromarray(image.astype("uint8"), "RGB")
|
58 |
inputs = [{"role": "user", "content": [img, instruction]}]
|
|
|
59 |
elif video is not None:
|
60 |
vid = encode_video(video)
|
61 |
+
inputs = [{"role": "user", "content": vid + [instruction]}]
|
|
|
62 |
elif audio is not None:
|
63 |
if isinstance(audio, str):
|
64 |
aud, _ = librosa.load(audio, sr=16000, mono=True)
|
65 |
else:
|
66 |
aud = audio
|
67 |
inputs = [{"role": "user", "content": [aud, instruction]}]
|
|
|
68 |
else:
|
69 |
return "No input provided."
|
70 |
|
71 |
params = {
|
|
|
72 |
"msgs": inputs,
|
73 |
"tokenizer": tokenizer,
|
74 |
#"processor": processor,
|