Spaces:
Runtime error
Runtime error
Update hawk/conversation/conversation_video.py
Browse files
hawk/conversation/conversation_video.py
CHANGED
@@ -8,7 +8,6 @@ from PIL import Image
|
|
8 |
import sys
|
9 |
import os
|
10 |
import torch
|
11 |
-
import spaces
|
12 |
|
13 |
from transformers import AutoTokenizer, AutoModelForCausalLM, LlamaTokenizer
|
14 |
from transformers import StoppingCriteria, StoppingCriteriaList
|
@@ -22,6 +21,7 @@ from hawk.processors.video_processor import ToTHWC,ToUint8,load_video,load_video
|
|
22 |
from hawk.processors import Blip2ImageEvalProcessor
|
23 |
|
24 |
from hawk.models.ImageBind.data import load_and_transform_audio_data
|
|
|
25 |
class SeparatorStyle(Enum):
|
26 |
"""Different separator style."""
|
27 |
SINGLE = auto()
|
@@ -178,7 +178,6 @@ class Chat:
|
|
178 |
# torch.tensor([2277, 29937]).to(self.device)] # '###' can be encoded in two different ways.
|
179 |
# self.stopping_criteria = StoppingCriteriaList([StoppingCriteriaSub(stops=stop_words_ids)])
|
180 |
|
181 |
-
@spaces.GPU
|
182 |
def ask(self, text, conv):
|
183 |
if len(conv.messages) > 0 and conv.messages[-1][0] == conv.roles[0] \
|
184 |
and ('</Video>' in conv.messages[-1][1] or '</Image>' in conv.messages[-1][1]): # last message is image.
|
@@ -186,7 +185,6 @@ class Chat:
|
|
186 |
else:
|
187 |
conv.append_message(conv.roles[0], text)
|
188 |
|
189 |
-
@spaces.GPU
|
190 |
def answer(self, conv, img_list, max_new_tokens=300, num_beams=1, min_length=1, top_p=0.9,
|
191 |
repetition_penalty=1.0, length_penalty=1, temperature=1.0, max_length=2000):
|
192 |
conv.append_message(conv.roles[1], None)
|
@@ -237,7 +235,6 @@ class Chat:
|
|
237 |
conv.messages[-1][1] = output_text
|
238 |
return output_text, output_token.cpu().numpy()
|
239 |
|
240 |
-
@spaces.GPU
|
241 |
def upload_video(self, video_path, conv, img_list):
|
242 |
|
243 |
msg = ""
|
@@ -285,7 +282,6 @@ class Chat:
|
|
285 |
conv.append_message(conv.roles[0], "<Video><ImageHere></Video> "+ msg)
|
286 |
return "Received."
|
287 |
|
288 |
-
@spaces.GPU
|
289 |
def upload_video_without_audio(self, video_path, conv, img_list):
|
290 |
msg = ""
|
291 |
if isinstance(video_path, str): # is a video path
|
@@ -326,7 +322,6 @@ class Chat:
|
|
326 |
conv.append_message(conv.roles[0], "<Video><ImageHere></Video> ")
|
327 |
return "Received."
|
328 |
|
329 |
-
@spaces.GPU
|
330 |
def upload_img(self, image, conv, img_list):
|
331 |
|
332 |
msg = ""
|
@@ -350,7 +345,6 @@ class Chat:
|
|
350 |
|
351 |
return "Received."
|
352 |
|
353 |
-
@spaces.GPU
|
354 |
def get_context_emb(self, conv, img_list):
|
355 |
prompt = conv.get_prompt()
|
356 |
prompt_segs = prompt.split('<ImageHere>')
|
|
|
8 |
import sys
|
9 |
import os
|
10 |
import torch
|
|
|
11 |
|
12 |
from transformers import AutoTokenizer, AutoModelForCausalLM, LlamaTokenizer
|
13 |
from transformers import StoppingCriteria, StoppingCriteriaList
|
|
|
21 |
from hawk.processors import Blip2ImageEvalProcessor
|
22 |
|
23 |
from hawk.models.ImageBind.data import load_and_transform_audio_data
|
24 |
+
|
25 |
class SeparatorStyle(Enum):
|
26 |
"""Different separator style."""
|
27 |
SINGLE = auto()
|
|
|
178 |
# torch.tensor([2277, 29937]).to(self.device)] # '###' can be encoded in two different ways.
|
179 |
# self.stopping_criteria = StoppingCriteriaList([StoppingCriteriaSub(stops=stop_words_ids)])
|
180 |
|
|
|
181 |
def ask(self, text, conv):
|
182 |
if len(conv.messages) > 0 and conv.messages[-1][0] == conv.roles[0] \
|
183 |
and ('</Video>' in conv.messages[-1][1] or '</Image>' in conv.messages[-1][1]): # last message is image.
|
|
|
185 |
else:
|
186 |
conv.append_message(conv.roles[0], text)
|
187 |
|
|
|
188 |
def answer(self, conv, img_list, max_new_tokens=300, num_beams=1, min_length=1, top_p=0.9,
|
189 |
repetition_penalty=1.0, length_penalty=1, temperature=1.0, max_length=2000):
|
190 |
conv.append_message(conv.roles[1], None)
|
|
|
235 |
conv.messages[-1][1] = output_text
|
236 |
return output_text, output_token.cpu().numpy()
|
237 |
|
|
|
238 |
def upload_video(self, video_path, conv, img_list):
|
239 |
|
240 |
msg = ""
|
|
|
282 |
conv.append_message(conv.roles[0], "<Video><ImageHere></Video> "+ msg)
|
283 |
return "Received."
|
284 |
|
|
|
285 |
def upload_video_without_audio(self, video_path, conv, img_list):
|
286 |
msg = ""
|
287 |
if isinstance(video_path, str): # is a video path
|
|
|
322 |
conv.append_message(conv.roles[0], "<Video><ImageHere></Video> ")
|
323 |
return "Received."
|
324 |
|
|
|
325 |
def upload_img(self, image, conv, img_list):
|
326 |
|
327 |
msg = ""
|
|
|
345 |
|
346 |
return "Received."
|
347 |
|
|
|
348 |
def get_context_emb(self, conv, img_list):
|
349 |
prompt = conv.get_prompt()
|
350 |
prompt_segs = prompt.split('<ImageHere>')
|