htian01 commited on
Commit
e82e1d3
Β·
verified Β·
1 Parent(s): 2630ff3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -48
app.py CHANGED
@@ -8,7 +8,7 @@ import spaces
8
  import cv2
9
  import numpy as np
10
  from PIL import Image
11
-
12
  def progress_bar_html(label: str) -> str:
13
  """
14
  Returns an HTML snippet for a thin progress bar with a label.
@@ -68,52 +68,6 @@ def model_inference(input_dict, history):
68
  text = input_dict["text"]
69
  files = input_dict["files"]
70
 
71
- if text.strip().lower().startswith("@video-infer"):
72
- # Remove the tag from the query.
73
- text = text[len("@video-infer"):].strip()
74
- if not files:
75
- gr.Error("Please upload a video file along with your @video-infer query.")
76
- return
77
- # Assume the first file is a video.
78
- video_path = files[0]
79
- frames = downsample_video(video_path)
80
- if not frames:
81
- gr.Error("Could not process video.")
82
- return
83
- # Build messages: start with the text prompt.
84
- messages = [
85
- {
86
- "role": "user",
87
- "content": [{"type": "text", "text": text}]
88
- }
89
- ]
90
- # Append each frame with a timestamp label.
91
- for image, timestamp in frames:
92
- messages[0]["content"].append({"type": "text", "text": f"Frame {timestamp}:"})
93
- messages[0]["content"].append({"type": "image", "image": image})
94
- # Collect only the images from the frames.
95
- video_images = [image for image, _ in frames]
96
- # Prepare the prompt.
97
- prompt = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
98
- inputs = processor(
99
- text=[prompt],
100
- images=video_images,
101
- return_tensors="pt",
102
- padding=True,
103
- ).to("cuda")
104
- # Set up streaming generation.
105
- streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=False)
106
- generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=1024)
107
- thread = Thread(target=model.generate, kwargs=generation_kwargs)
108
- thread.start()
109
- buffer = ""
110
- yield progress_bar_html("Processing video with MiMo-VL-7B-RL Model")
111
- for new_text in streamer:
112
- buffer += new_text
113
- time.sleep(0.01)
114
- yield buffer
115
- return
116
-
117
  if len(files) > 1:
118
  images = [load_image(image) for image in files]
119
  elif len(files) == 1:
@@ -151,7 +105,9 @@ def model_inference(input_dict, history):
151
  buffer = ""
152
  yield progress_bar_html("Processing with MiMo-VL-7B-RL Model")
153
  for new_text in streamer:
154
- buffer += new_text
 
 
155
  time.sleep(0.01)
156
  yield buffer
157
 
 
8
  import cv2
9
  import numpy as np
10
  from PIL import Image
11
+ import html # <--- 1. ε―Όε…₯ html 樑块
12
  def progress_bar_html(label: str) -> str:
13
  """
14
  Returns an HTML snippet for a thin progress bar with a label.
 
68
  text = input_dict["text"]
69
  files = input_dict["files"]
70
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
  if len(files) > 1:
72
  images = [load_image(image) for image in files]
73
  elif len(files) == 1:
 
105
  buffer = ""
106
  yield progress_bar_html("Processing with MiMo-VL-7B-RL Model")
107
  for new_text in streamer:
108
+ escaped_new_text = html.escape(new_text)
109
+ buffer += escaped_new_text
110
+
111
  time.sleep(0.01)
112
  yield buffer
113