prithivMLmods commited on
Commit
cdaa0c6
·
verified ·
1 Parent(s): 2aa26ec

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -22
app.py CHANGED
@@ -22,11 +22,6 @@ from transformers import (
22
  )
23
  from transformers.image_utils import load_image
24
 
25
- import subprocess
26
- subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
27
-
28
- from io import BytesIO
29
-
30
  # Constants for text generation
31
  MAX_MAX_NEW_TOKENS = 2048
32
  DEFAULT_MAX_NEW_TOKENS = 1024
@@ -61,16 +56,6 @@ model_o = Qwen2VLForConditionalGeneration.from_pretrained(
61
  torch_dtype=torch.float16
62
  ).to(device).eval()
63
 
64
- # Load SmolVLM2-2.2B-Instruct
65
- MODEL_ID_W = "HuggingFaceTB/SmolVLM2-2.2B-Instruct"
66
- processor_w = AutoProcessor.from_pretrained(MODEL_ID_W, trust_remote_code=True)
67
- model_w= AutoModelForImageTextToText.from_pretrained(
68
- MODEL_ID_W,
69
- trust_remote_code=True,
70
- _attn_implementation="flash_attention_2",
71
- torch_dtype=torch.float16
72
- ).to(device).eval()
73
-
74
  def downsample_video(video_path):
75
  """
76
  Downsamples the video to evenly spaced frames.
@@ -111,9 +96,6 @@ def generate_image(model_name: str, text: str, image: Image.Image,
111
  elif model_name == "olmOCR-7B-0225":
112
  processor = processor_o
113
  model = model_o
114
- elif model_name == "SmolVLM2":
115
- processor = processor_w
116
- model = model_w
117
  else:
118
  yield "Invalid model selected."
119
  return
@@ -168,9 +150,6 @@ def generate_video(model_name: str, text: str, video_path: str,
168
  elif model_name == "olmOCR-7B-0225":
169
  processor = processor_o
170
  model = model_o
171
- elif model_name == "SmolVLM2":
172
- processor = processor_w
173
- model = model_w
174
  else:
175
  yield "Invalid model selected."
176
  return
@@ -269,7 +248,7 @@ with gr.Blocks(css=css, theme="bethecloud/storj_theme") as demo:
269
  with gr.Column():
270
  output = gr.Textbox(label="Output", interactive=False, lines=2, scale=2)
271
  model_choice = gr.Radio(
272
- choices=["VIREX-062225-exp", "DREX-062225-exp", "olmOCR-7B-0225", "SmolVLM2"],
273
  label="Select Model",
274
  value="VIREX-062225-exp"
275
  )
 
22
  )
23
  from transformers.image_utils import load_image
24
 
 
 
 
 
 
25
  # Constants for text generation
26
  MAX_MAX_NEW_TOKENS = 2048
27
  DEFAULT_MAX_NEW_TOKENS = 1024
 
56
  torch_dtype=torch.float16
57
  ).to(device).eval()
58
 
 
 
 
 
 
 
 
 
 
 
59
  def downsample_video(video_path):
60
  """
61
  Downsamples the video to evenly spaced frames.
 
96
  elif model_name == "olmOCR-7B-0225":
97
  processor = processor_o
98
  model = model_o
 
 
 
99
  else:
100
  yield "Invalid model selected."
101
  return
 
150
  elif model_name == "olmOCR-7B-0225":
151
  processor = processor_o
152
  model = model_o
 
 
 
153
  else:
154
  yield "Invalid model selected."
155
  return
 
248
  with gr.Column():
249
  output = gr.Textbox(label="Output", interactive=False, lines=2, scale=2)
250
  model_choice = gr.Radio(
251
+ choices=["VIREX-062225-exp", "DREX-062225-exp", "olmOCR-7B-0225"],
252
  label="Select Model",
253
  value="VIREX-062225-exp"
254
  )