Spaces:

AIML-TUDA
/

LlavaGuard

Running on Zero

App Files Files Community

LukasHug commited on 4 days ago

Commit

dda3db7

verified ·

1 Parent(s): 98977e3

Update app.py

Browse files

Files changed (1) hide show

app.py +28 -44

app.py CHANGED Viewed

@@ -130,11 +130,7 @@ class SimpleConversation:
 default_conversation = SimpleConversation()
-# Model and processor storage
-tokenizer = None
-model = None
-processor = None
-context_len = 8048
 def wrap_taxonomy(text):
@@ -150,42 +146,6 @@ enable_btn = gr.Button(interactive=True)
 disable_btn = gr.Button(interactive=False)
-# Model loading function
-@spaces.GPU
-def load_model(model_path):
-    global tokenizer, model, processor, context_len
-    logger.info(f"Loading model: {model_path}")
-    try:
-        # Check if it's a Qwen model
-        if "qwenguard" in model_path.lower():
-            model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
-                model_path,
-                torch_dtype="auto",
-                device_map="auto"
-            )
-            processor = AutoProcessor.from_pretrained(model_path)
-            tokenizer = processor.tokenizer
-        # Otherwise assume it's a LlavaGuard model
-        else:
-            model = LlavaOnevisionForConditionalGeneration.from_pretrained(
-                model_path,
-                torch_dtype="auto",
-                device_map="auto",
-                trust_remote_code=True
-            )
-            tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
-            processor = AutoProcessor.from_pretrained(model_path, trust_remote_code=True)
-        context_len = getattr(model.config, "max_position_embeddings", 8048)
-        logger.info(f"Model {model_path} loaded successfully")
-        return  # Remove return value to avoid Gradio warnings
-    except Exception as e:
-        logger.error(f"Error loading model {model_path}: {str(e)}")
-        return  # Remove return value to avoid Gradio warnings
 def get_conv_log_filename():
@@ -198,8 +158,6 @@ def get_conv_log_filename():
 # Inference function
 @spaces.GPU
 def run_inference(prompt, image, temperature=0.2, top_p=0.95, max_tokens=512):
-    global model, tokenizer, processor
     if model is None or processor is None:
         return "Model not loaded. Please wait for model to initialize."
     try:
@@ -622,7 +580,33 @@ if api_key:
     logger.info("Logged in to Hugging Face Hub")
 # Load model at startup
-load_model(DEFAULT_MODEL)
 demo = build_demo(embed_mode=args.embed, cur_dir='./', concurrency_count=args.concurrency_count)
 demo.queue(

 default_conversation = SimpleConversation()
 def wrap_taxonomy(text):
 disable_btn = gr.Button(interactive=False)
 def get_conv_log_filename():
 # Inference function
 @spaces.GPU
 def run_inference(prompt, image, temperature=0.2, top_p=0.95, max_tokens=512):
     if model is None or processor is None:
         return "Model not loaded. Please wait for model to initialize."
     try:
     logger.info("Logged in to Hugging Face Hub")
 # Load model at startup
+model_path = DEFAULT_MODEL
+logger.info(f"Loading model: {model_path}")
+# Check if it's a Qwen model
+if "qwenguard" in model_path.lower():
+    @spaces.GPU
+    model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
+        model_path,
+        torch_dtype="auto",
+        device_map="auto"
+    )
+    processor = AutoProcessor.from_pretrained(model_path)
+    tokenizer = processor.tokenizer
+# Otherwise assume it's a LlavaGuard model
+else:
+    @spaces.GPU
+    model = LlavaOnevisionForConditionalGeneration.from_pretrained(
+        model_path,
+        torch_dtype="auto",
+        device_map="auto",
+        trust_remote_code=True
+    )
+    tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
+    processor = AutoProcessor.from_pretrained(model_path, trust_remote_code=True)
+context_len = getattr(model.config, "max_position_embeddings", 8048)
+logger.info(f"Model {model_path} loaded successfully")
 demo = build_demo(embed_mode=args.embed, cur_dir='./', concurrency_count=args.concurrency_count)
 demo.queue(