prithivMLmods commited on
Commit
a4a6abd
·
verified ·
1 Parent(s): e363aa0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -11
app.py CHANGED
@@ -17,10 +17,15 @@ from transformers import (
17
  Qwen2_5_VLForConditionalGeneration,
18
  Qwen2VLForConditionalGeneration,
19
  AutoProcessor,
 
20
  TextIteratorStreamer,
21
  )
22
  from transformers.image_utils import load_image
23
 
 
 
 
 
24
  # Constants for text generation
25
  MAX_MAX_NEW_TOKENS = 2048
26
  DEFAULT_MAX_NEW_TOKENS = 1024
@@ -46,8 +51,8 @@ model_x = Qwen2VLForConditionalGeneration.from_pretrained(
46
  torch_dtype=torch.float16
47
  ).to(device).eval()
48
 
49
- # Load Relaxed
50
- MODEL_ID_Z = "Qwen/Qwen2.5-VL-3B-Instruct"
51
  processor_z = AutoProcessor.from_pretrained(MODEL_ID_Z, trust_remote_code=True)
52
  model_z = Qwen2_5_VLForConditionalGeneration.from_pretrained(
53
  MODEL_ID_Z,
@@ -55,12 +60,13 @@ model_z = Qwen2_5_VLForConditionalGeneration.from_pretrained(
55
  torch_dtype=torch.float16
56
  ).to(device).eval()
57
 
58
- # Load ImageScope
59
- MODEL_ID_T = "prithivMLmods/Imgscope-OCR-2B-0527"
60
- processor_t = AutoProcessor.from_pretrained(MODEL_ID_X, trust_remote_code=True)
61
- model_t = Qwen2VLForConditionalGeneration.from_pretrained(
62
  MODEL_ID_T,
63
  trust_remote_code=True,
 
64
  torch_dtype=torch.float16
65
  ).to(device).eval()
66
 
@@ -103,10 +109,10 @@ def generate_image(model_name: str, text: str, image: Image.Image,
103
  elif model_name == "coreOCR-7B-050325-preview":
104
  processor = processor_x
105
  model = model_x
106
- elif model_name == "Qwen2.5-VL-3B":
107
  processor = processor_z
108
  model = model_z
109
- elif model_name == "Imgscope-OCR-2B":
110
  processor = processor_t
111
  model = model_t
112
  else:
@@ -159,10 +165,10 @@ def generate_video(model_name: str, text: str, video_path: str,
159
  elif model_name == "coreOCR-7B-050325-preview":
160
  processor = processor_x
161
  model = model_x
162
- elif model_name == "Qwen2.5-VL-3B":
163
  processor = processor_z
164
  model = model_z
165
- elif model_name == "Imgscope-OCR-2B":
166
  processor = processor_t
167
  model = model_t
168
  else:
@@ -262,7 +268,7 @@ with gr.Blocks(css=css, theme="bethecloud/storj_theme") as demo:
262
  with gr.Column():
263
  output = gr.Textbox(label="Output", interactive=False, lines=2, scale=2)
264
  model_choice = gr.Radio(
265
- choices=["coreOCR-7B-050325-preview", "typhoon-ocr-7b", "Qwen2.5-VL-3B", "Imgscope-OCR-2B"],
266
  label="Select Model",
267
  value="coreOCR-7B-050325-preview"
268
  )
 
17
  Qwen2_5_VLForConditionalGeneration,
18
  Qwen2VLForConditionalGeneration,
19
  AutoProcessor,
20
+ AutoTokenizer,
21
  TextIteratorStreamer,
22
  )
23
  from transformers.image_utils import load_image
24
 
25
+ import subprocess
26
+ subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
27
+
28
+
29
  # Constants for text generation
30
  MAX_MAX_NEW_TOKENS = 2048
31
  DEFAULT_MAX_NEW_TOKENS = 1024
 
51
  torch_dtype=torch.float16
52
  ).to(device).eval()
53
 
54
+ # Load Space Thinker
55
+ MODEL_ID_Z = "remyxai/SpaceThinker-Qwen2.5VL-3B"
56
  processor_z = AutoProcessor.from_pretrained(MODEL_ID_Z, trust_remote_code=True)
57
  model_z = Qwen2_5_VLForConditionalGeneration.from_pretrained(
58
  MODEL_ID_Z,
 
60
  torch_dtype=torch.float16
61
  ).to(device).eval()
62
 
63
+ # Load moondream
64
+ MODEL_ID_T = "moondream/moondream-2b-2025-04-14-4bit"
65
+ processor_t = AutoTokenizer.from_pretrained(MODEL_ID_X, trust_remote_code=True)
66
+ model_t = AutoModelForCausalLM.from_pretrained(
67
  MODEL_ID_T,
68
  trust_remote_code=True,
69
+ attn_implementation="flash_attention_2",
70
  torch_dtype=torch.float16
71
  ).to(device).eval()
72
 
 
109
  elif model_name == "coreOCR-7B-050325-preview":
110
  processor = processor_x
111
  model = model_x
112
+ elif model_name == "SpaceThinker-Qwen2.5VL-3B":
113
  processor = processor_z
114
  model = model_z
115
+ elif model_name == "moondream-2b-2025-04-14-4bit":
116
  processor = processor_t
117
  model = model_t
118
  else:
 
165
  elif model_name == "coreOCR-7B-050325-preview":
166
  processor = processor_x
167
  model = model_x
168
+ elif model_name == "SpaceThinker-Qwen2.5VL-3B":
169
  processor = processor_z
170
  model = model_z
171
+ elif model_name == "moondream-2b-2025-04-14-4bit":
172
  processor = processor_t
173
  model = model_t
174
  else:
 
268
  with gr.Column():
269
  output = gr.Textbox(label="Output", interactive=False, lines=2, scale=2)
270
  model_choice = gr.Radio(
271
+ choices=["coreOCR-7B-050325-preview", "typhoon-ocr-7b", "SpaceThinker-Qwen2.5VL-3B", "moondream-2b-2025-04-14-4bit"],
272
  label="Select Model",
273
  value="coreOCR-7B-050325-preview"
274
  )