Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -130,11 +130,7 @@ class SimpleConversation:
|
|
130 |
|
131 |
default_conversation = SimpleConversation()
|
132 |
|
133 |
-
|
134 |
-
tokenizer = None
|
135 |
-
model = None
|
136 |
-
processor = None
|
137 |
-
context_len = 8048
|
138 |
|
139 |
|
140 |
def wrap_taxonomy(text):
|
@@ -150,42 +146,6 @@ enable_btn = gr.Button(interactive=True)
|
|
150 |
disable_btn = gr.Button(interactive=False)
|
151 |
|
152 |
|
153 |
-
# Model loading function
|
154 |
-
@spaces.GPU
|
155 |
-
def load_model(model_path):
|
156 |
-
global tokenizer, model, processor, context_len
|
157 |
-
|
158 |
-
logger.info(f"Loading model: {model_path}")
|
159 |
-
|
160 |
-
try:
|
161 |
-
# Check if it's a Qwen model
|
162 |
-
if "qwenguard" in model_path.lower():
|
163 |
-
model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
|
164 |
-
model_path,
|
165 |
-
torch_dtype="auto",
|
166 |
-
device_map="auto"
|
167 |
-
)
|
168 |
-
processor = AutoProcessor.from_pretrained(model_path)
|
169 |
-
tokenizer = processor.tokenizer
|
170 |
-
|
171 |
-
# Otherwise assume it's a LlavaGuard model
|
172 |
-
else:
|
173 |
-
model = LlavaOnevisionForConditionalGeneration.from_pretrained(
|
174 |
-
model_path,
|
175 |
-
torch_dtype="auto",
|
176 |
-
device_map="auto",
|
177 |
-
trust_remote_code=True
|
178 |
-
)
|
179 |
-
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
|
180 |
-
processor = AutoProcessor.from_pretrained(model_path, trust_remote_code=True)
|
181 |
-
|
182 |
-
context_len = getattr(model.config, "max_position_embeddings", 8048)
|
183 |
-
logger.info(f"Model {model_path} loaded successfully")
|
184 |
-
return # Remove return value to avoid Gradio warnings
|
185 |
-
|
186 |
-
except Exception as e:
|
187 |
-
logger.error(f"Error loading model {model_path}: {str(e)}")
|
188 |
-
return # Remove return value to avoid Gradio warnings
|
189 |
|
190 |
|
191 |
def get_conv_log_filename():
|
@@ -198,8 +158,6 @@ def get_conv_log_filename():
|
|
198 |
# Inference function
|
199 |
@spaces.GPU
|
200 |
def run_inference(prompt, image, temperature=0.2, top_p=0.95, max_tokens=512):
|
201 |
-
global model, tokenizer, processor
|
202 |
-
|
203 |
if model is None or processor is None:
|
204 |
return "Model not loaded. Please wait for model to initialize."
|
205 |
try:
|
@@ -622,7 +580,33 @@ if api_key:
|
|
622 |
logger.info("Logged in to Hugging Face Hub")
|
623 |
|
624 |
# Load model at startup
|
625 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
626 |
|
627 |
demo = build_demo(embed_mode=args.embed, cur_dir='./', concurrency_count=args.concurrency_count)
|
628 |
demo.queue(
|
|
|
130 |
|
131 |
default_conversation = SimpleConversation()
|
132 |
|
133 |
+
|
|
|
|
|
|
|
|
|
134 |
|
135 |
|
136 |
def wrap_taxonomy(text):
|
|
|
146 |
disable_btn = gr.Button(interactive=False)
|
147 |
|
148 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
149 |
|
150 |
|
151 |
def get_conv_log_filename():
|
|
|
158 |
# Inference function
|
159 |
@spaces.GPU
|
160 |
def run_inference(prompt, image, temperature=0.2, top_p=0.95, max_tokens=512):
|
|
|
|
|
161 |
if model is None or processor is None:
|
162 |
return "Model not loaded. Please wait for model to initialize."
|
163 |
try:
|
|
|
580 |
logger.info("Logged in to Hugging Face Hub")
|
581 |
|
582 |
# Load model at startup
|
583 |
+
model_path = DEFAULT_MODEL
|
584 |
+
logger.info(f"Loading model: {model_path}")
|
585 |
+
# Check if it's a Qwen model
|
586 |
+
if "qwenguard" in model_path.lower():
|
587 |
+
@spaces.GPU
|
588 |
+
model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
|
589 |
+
model_path,
|
590 |
+
torch_dtype="auto",
|
591 |
+
device_map="auto"
|
592 |
+
)
|
593 |
+
processor = AutoProcessor.from_pretrained(model_path)
|
594 |
+
tokenizer = processor.tokenizer
|
595 |
+
|
596 |
+
# Otherwise assume it's a LlavaGuard model
|
597 |
+
else:
|
598 |
+
@spaces.GPU
|
599 |
+
model = LlavaOnevisionForConditionalGeneration.from_pretrained(
|
600 |
+
model_path,
|
601 |
+
torch_dtype="auto",
|
602 |
+
device_map="auto",
|
603 |
+
trust_remote_code=True
|
604 |
+
)
|
605 |
+
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
|
606 |
+
processor = AutoProcessor.from_pretrained(model_path, trust_remote_code=True)
|
607 |
+
|
608 |
+
context_len = getattr(model.config, "max_position_embeddings", 8048)
|
609 |
+
logger.info(f"Model {model_path} loaded successfully")
|
610 |
|
611 |
demo = build_demo(embed_mode=args.embed, cur_dir='./', concurrency_count=args.concurrency_count)
|
612 |
demo.queue(
|