Jiaqi-hkust commited on
Commit
aac9acb
·
verified ·
1 Parent(s): 53f4461

Update hawk/conversation/conversation_video.py

Browse files
hawk/conversation/conversation_video.py CHANGED
@@ -8,7 +8,6 @@ from PIL import Image
8
  import sys
9
  import os
10
  import torch
11
- import spaces
12
 
13
  from transformers import AutoTokenizer, AutoModelForCausalLM, LlamaTokenizer
14
  from transformers import StoppingCriteria, StoppingCriteriaList
@@ -22,6 +21,7 @@ from hawk.processors.video_processor import ToTHWC,ToUint8,load_video,load_video
22
  from hawk.processors import Blip2ImageEvalProcessor
23
 
24
  from hawk.models.ImageBind.data import load_and_transform_audio_data
 
25
  class SeparatorStyle(Enum):
26
  """Different separator style."""
27
  SINGLE = auto()
@@ -178,7 +178,6 @@ class Chat:
178
  # torch.tensor([2277, 29937]).to(self.device)] # '###' can be encoded in two different ways.
179
  # self.stopping_criteria = StoppingCriteriaList([StoppingCriteriaSub(stops=stop_words_ids)])
180
 
181
- @spaces.GPU
182
  def ask(self, text, conv):
183
  if len(conv.messages) > 0 and conv.messages[-1][0] == conv.roles[0] \
184
  and ('</Video>' in conv.messages[-1][1] or '</Image>' in conv.messages[-1][1]): # last message is image.
@@ -186,7 +185,6 @@ class Chat:
186
  else:
187
  conv.append_message(conv.roles[0], text)
188
 
189
- @spaces.GPU
190
  def answer(self, conv, img_list, max_new_tokens=300, num_beams=1, min_length=1, top_p=0.9,
191
  repetition_penalty=1.0, length_penalty=1, temperature=1.0, max_length=2000):
192
  conv.append_message(conv.roles[1], None)
@@ -237,7 +235,6 @@ class Chat:
237
  conv.messages[-1][1] = output_text
238
  return output_text, output_token.cpu().numpy()
239
 
240
- @spaces.GPU
241
  def upload_video(self, video_path, conv, img_list):
242
 
243
  msg = ""
@@ -285,7 +282,6 @@ class Chat:
285
  conv.append_message(conv.roles[0], "<Video><ImageHere></Video> "+ msg)
286
  return "Received."
287
 
288
- @spaces.GPU
289
  def upload_video_without_audio(self, video_path, conv, img_list):
290
  msg = ""
291
  if isinstance(video_path, str): # is a video path
@@ -326,7 +322,6 @@ class Chat:
326
  conv.append_message(conv.roles[0], "<Video><ImageHere></Video> ")
327
  return "Received."
328
 
329
- @spaces.GPU
330
  def upload_img(self, image, conv, img_list):
331
 
332
  msg = ""
@@ -350,7 +345,6 @@ class Chat:
350
 
351
  return "Received."
352
 
353
- @spaces.GPU
354
  def get_context_emb(self, conv, img_list):
355
  prompt = conv.get_prompt()
356
  prompt_segs = prompt.split('<ImageHere>')
 
8
  import sys
9
  import os
10
  import torch
 
11
 
12
  from transformers import AutoTokenizer, AutoModelForCausalLM, LlamaTokenizer
13
  from transformers import StoppingCriteria, StoppingCriteriaList
 
21
  from hawk.processors import Blip2ImageEvalProcessor
22
 
23
  from hawk.models.ImageBind.data import load_and_transform_audio_data
24
+
25
  class SeparatorStyle(Enum):
26
  """Different separator style."""
27
  SINGLE = auto()
 
178
  # torch.tensor([2277, 29937]).to(self.device)] # '###' can be encoded in two different ways.
179
  # self.stopping_criteria = StoppingCriteriaList([StoppingCriteriaSub(stops=stop_words_ids)])
180
 
 
181
  def ask(self, text, conv):
182
  if len(conv.messages) > 0 and conv.messages[-1][0] == conv.roles[0] \
183
  and ('</Video>' in conv.messages[-1][1] or '</Image>' in conv.messages[-1][1]): # last message is image.
 
185
  else:
186
  conv.append_message(conv.roles[0], text)
187
 
 
188
  def answer(self, conv, img_list, max_new_tokens=300, num_beams=1, min_length=1, top_p=0.9,
189
  repetition_penalty=1.0, length_penalty=1, temperature=1.0, max_length=2000):
190
  conv.append_message(conv.roles[1], None)
 
235
  conv.messages[-1][1] = output_text
236
  return output_text, output_token.cpu().numpy()
237
 
 
238
  def upload_video(self, video_path, conv, img_list):
239
 
240
  msg = ""
 
282
  conv.append_message(conv.roles[0], "<Video><ImageHere></Video> "+ msg)
283
  return "Received."
284
 
 
285
  def upload_video_without_audio(self, video_path, conv, img_list):
286
  msg = ""
287
  if isinstance(video_path, str): # is a video path
 
322
  conv.append_message(conv.roles[0], "<Video><ImageHere></Video> ")
323
  return "Received."
324
 
 
325
  def upload_img(self, image, conv, img_list):
326
 
327
  msg = ""
 
345
 
346
  return "Received."
347
 
 
348
  def get_context_emb(self, conv, img_list):
349
  prompt = conv.get_prompt()
350
  prompt_segs = prompt.split('<ImageHere>')