Spaces:

nguyentantoan
/

vintern-video-recognition

Sleeping

App Files Files Community

nguyentantoan commited on May 23

Commit

93ab632

verified ·

1 Parent(s): b5f11ba

Upload app.py

Browse files

Files changed (1) hide show

app.py +154 -0

app.py ADDED Viewed

	@@ -0,0 +1,154 @@

+import gradio as gr
+import torch
+from transformers import AutoModel, AutoTokenizer
+import torchvision.transforms as T
+from torchvision.transforms.functional import InterpolationMode
+from PIL import Image
+import base64
+import io
+import time
+# Setup
+device = "cpu"  # HF Spaces miễn phí chỉ có CPU
+model = None
+tokenizer = None
+transform = None
+def load_model():
+    global model, tokenizer, transform
+    try:
+        print("🤖 Loading Vintern-1B-v3.5...")
+        model_name = "5CD-AI/Vintern-1B-v3_5"
+        tokenizer = AutoTokenizer.from_pretrained(
+            model_name,
+            trust_remote_code=True
+        )
+        model = AutoModel.from_pretrained(
+            model_name,
+            torch_dtype=torch.float32,
+            trust_remote_code=True,
+            low_cpu_mem_usage=True
+        )
+        # Image transform
+        IMAGENET_MEAN = (0.485, 0.456, 0.406)
+        IMAGENET_STD = (0.229, 0.224, 0.225)
+        transform = T.Compose([
+            T.Lambda(lambda img: img.convert('RGB') if img.mode != 'RGB' else img),
+            T.Resize((448, 448), interpolation=InterpolationMode.BICUBIC),
+            T.ToTensor(),
+            T.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD)
+        ])
+        print("✅ Model loaded successfully!")
+        return True
+    except Exception as e:
+        print(f"❌ Error loading model: {e}")
+        return False
+def analyze_image(image):
+    if model is None:
+        return "❌ Model chưa được tải. Vui lòng chờ..."
+    try:
+        start_time = time.time()
+        # Preprocess image
+        if isinstance(image, str):
+            # Base64 image
+            if image.startswith('data:image'):
+                image = image.split(',')[1]
+            image_bytes = base64.b64decode(image)
+            image = Image.open(io.BytesIO(image_bytes)).convert('RGB')
+        image_tensor = transform(image).unsqueeze(0).to(device)
+        with torch.no_grad():
+            query = "Mô tả chi tiết những gì bạn thấy trong hình ảnh này:"
+            description = model.chat(
+                tokenizer,
+                image_tensor,
+                query,
+                generation_config=dict(
+                    max_new_tokens=200,
+                    do_sample=True,
+                    temperature=0.7,
+                    top_p=0.9,
+                    repetition_penalty=1.1
+                )
+            )
+            # Get objects
+            try:
+                object_query = "Liệt kê các đối tượng chính:"
+                objects_text = model.chat(
+                    tokenizer,
+                    image_tensor,
+                    object_query,
+                    generation_config=dict(max_new_tokens=100, temperature=0.5)
+                )
+                objects = [obj.strip() for obj in objects_text.replace(',', ' ').split() if len(obj.strip()) > 2][:5]
+                objects_str = ", ".join(objects) if objects else "Không có"
+            except:
+                objects_str = "Không có"
+            processing_time = time.time() - start_time
+            return f"""
+**📝 Mô tả từ Vintern AI:**
+{description}
+**🔍 Đối tượng nhận diện:**
+{objects_str}
+**⚡ Thời gian xử lý:** {processing_time:.2f}s
+**🤖 Model:** Vintern-1B-v3.5 (Hugging Face Spaces)
+"""
+    except Exception as e:
+        return f"❌ Lỗi phân tích: {str(e)}"
+# Load model khi khởi động
+print("🚀 Initializing Vintern-1B-v3.5...")
+model_loaded = load_model()
+# Gradio interface
+with gr.Blocks(title="Vintern-1B-v3.5 Video Recognition") as demo:
+    gr.Markdown("# 🎥 Vintern-1B-v3.5 - Nhận Diện Ảnh Tiếng Việt")
+    gr.Markdown("Upload ảnh để nhận diện nội dung bằng AI Vintern-1B-v3.5")
+    if not model_loaded:
+        gr.Markdown("⚠️ **Model đang được tải...** Vui lòng chờ vài phút.")
+    with gr.Row():
+        with gr.Column():
+            image_input = gr.Image(type="pil", label="📤 Upload Ảnh")
+            analyze_btn = gr.Button("🔍 Phân Tích", variant="primary")
+        with gr.Column():
+            result_output = gr.Textbox(label="📋 Kết Quả", lines=10, max_lines=15)
+    analyze_btn.click(
+        fn=analyze_image,
+        inputs=image_input,
+        outputs=result_output
+    )
+    gr.Markdown("""
+    ---
+    **💡 Hướng dẫn:**
+    1. Upload ảnh từ máy tính hoặc webcam
+    2. Nhấn "Phân Tích" để nhận diện
+    3. Xem kết quả mô tả tiếng Việt
+    **🔗 API Endpoint:** Sử dụng URL của Space này trong trangchu.html
+    """)
+if __name__ == "__main__":
+    demo.launch(server_name="0.0.0.0", server_port=7860)