Spaces:

therealsaed
/

Image_Captioning_model

Runtime error

App Files Files Community

therealsaed commited on 7 days ago

Commit

24b38ad

verified ·

1 Parent(s): 983c21d

Update app.py

Browse files

Files changed (1) hide show

app.py +212 -67

app.py CHANGED Viewed

@@ -1,67 +1,212 @@
-"""
-Hugging Face Spaces App
-Deploy this to HF Spaces for free hosting
-"""
-import gradio as gr
-from transformers import BlipProcessor, BlipForConditionalGeneration
-from transformers import AutoProcessor, AutoModelForCausalLM
-from PIL import Image
-import torch
-# Load models
-print("Loading models...")
-blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
-blip_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
-git_processor = AutoProcessor.from_pretrained("microsoft/git-base")
-git_model = AutoModelForCausalLM.from_pretrained("microsoft/git-base")
-def generate_captions(image, true_caption=""):
-    """Generate captions using multiple models"""
-    if image is None:
-        return "Please upload an image first."
-    results = []
-    # BLIP model
-    try:
-        inputs = blip_processor(image, return_tensors="pt")
-        out = blip_model.generate(**inputs, max_length=50)
-        blip_caption = blip_processor.decode(out[0], skip_special_tokens=True)
-        results.append(f"**BLIP:** {blip_caption}")
-    except Exception as e:
-        results.append(f"**BLIP:** Error - {str(e)}")
-    # GIT model
-    try:
-        inputs = git_processor(images=image, return_tensors="pt")
-        generated_ids = git_model.generate(pixel_values=inputs.pixel_values, max_length=50)
-        git_caption = git_processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
-        results.append(f"**GIT:** {git_caption}")
-    except Exception as e:
-        results.append(f"**GIT:** Error - {str(e)}")
-    if true_caption:
-        results.insert(0, f"**True Caption:** {true_caption}")
-    return "\n\n".join(results)
-# Create Gradio interface
-demo = gr.Interface(
-    fn=generate_captions,
-    inputs=[
-        gr.Image(type="pil", label="Upload Image"),
-        gr.Textbox(label="True Caption (Optional)", placeholder="Enter the correct caption for comparison")
-    ],
-    outputs=gr.Textbox(label="Generated Captions", lines=10),
-    title="🤖 AI Image Captioning",
-    description="Upload an image and get captions from multiple AI models!",
-    examples=[
-        ["https://huggingface.co/datasets/mishig/sample_images/resolve/main/cat.jpg", ""],
-        ["https://huggingface.co/datasets/mishig/sample_images/resolve/main/dog.jpg", ""],
-    ]
-)
-if __name__ == "__main__":
-    demo.launch()

+"""
+Hugging Face Spaces App - Image Captioning
+Deploy this to HF Spaces for free hosting
+"""
+import gradio as gr
+import torch
+from PIL import Image
+import time
+def load_models():
+    """Load models with error handling"""
+    models = {}
+    try:
+        from transformers import BlipProcessor, BlipForConditionalGeneration
+        print("Loading BLIP model...")
+        models['blip_processor'] = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
+        models['blip_model'] = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
+        print("✅ BLIP loaded successfully")
+    except Exception as e:
+        print(f"❌ BLIP failed to load: {e}")
+        models['blip_error'] = str(e)
+    try:
+        from transformers import AutoProcessor, AutoModelForCausalLM
+        print("Loading GIT model...")
+        models['git_processor'] = AutoProcessor.from_pretrained("microsoft/git-base")
+        models['git_model'] = AutoModelForCausalLM.from_pretrained("microsoft/git-base")
+        print("✅ GIT loaded successfully")
+    except Exception as e:
+        print(f"❌ GIT failed to load: {e}")
+        models['git_error'] = str(e)
+    return models
+# Load models at startup
+print("🚀 Loading AI models...")
+models = load_models()
+print(f"📦 Models loading completed")
+def generate_captions(image, true_caption=""):
+    """Generate captions using available models"""
+    if image is None:
+        return "❌ Please upload an image first."
+    # Ensure image is in RGB format
+    if image.mode != 'RGB':
+        image = image.convert('RGB')
+    results = []
+    start_time = time.time()
+    # Add true caption if provided
+    if true_caption.strip():
+        results.append(f"**🎯 True Caption:**")
+        results.append(f"{true_caption.strip()}")
+        results.append("")
+    # BLIP model
+    if 'blip_model' in models:
+        try:
+            blip_start = time.time()
+            inputs = models['blip_processor'](image, return_tensors="pt")
+            out = models['blip_model'].generate(**inputs, max_length=50, num_beams=5)
+            blip_caption = models['blip_processor'].decode(out[0], skip_special_tokens=True)
+            blip_time = time.time() - blip_start
+            results.append(f"**🤖 BLIP Model:** ({blip_time:.2f}s)")
+            results.append(f"{blip_caption}")
+            results.append("")
+        except Exception as e:
+            results.append(f"**🤖 BLIP Model:** Error - {str(e)}")
+            results.append("")
+    elif 'blip_error' in models:
+        results.append(f"**🤖 BLIP Model:** Not available - {models['blip_error']}")
+        results.append("")
+    # GIT model
+    if 'git_model' in models:
+        try:
+            git_start = time.time()
+            inputs = models['git_processor'](images=image, return_tensors="pt")
+            generated_ids = models['git_model'].generate(
+                pixel_values=inputs.pixel_values,
+                max_length=50,
+                num_beams=5
+            )
+            git_caption = models['git_processor'].batch_decode(generated_ids, skip_special_tokens=True)[0]
+            git_time = time.time() - git_start
+            results.append(f"**🧠 GIT Model:** ({git_time:.2f}s)")
+            results.append(f"{git_caption}")
+            results.append("")
+        except Exception as e:
+            results.append(f"**🧠 GIT Model:** Error - {str(e)}")
+            results.append("")
+    elif 'git_error' in models:
+        results.append(f"**🧠 GIT Model:** Not available - {models['git_error']}")
+        results.append("")
+    total_time = time.time() - start_time
+    results.append("---")
+    results.append(f"**⏱️ Total Processing Time:** {total_time:.2f} seconds")
+    results.append("")
+    results.append("**📊 About the Models:**")
+    results.append("• **BLIP**: Salesforce's Bootstrapping Language-Image Pre-training")
+    results.append("• **GIT**: Microsoft's Generative Image-to-text Transformer")
+    return "\n".join(results)
+# Create Gradio interface
+with gr.Blocks(
+    title="AI Image Captioning",
+    theme=gr.themes.Soft(),
+    css="""
+    .gradio-container {
+        max-width: 1200px !important;
+    }
+    """
+) as demo:
+    gr.Markdown("""
+    # 🤖 AI Image Captioning
+    Upload an image and get captions from multiple state-of-the-art AI models!
+    **Available Models:**
+    - 🤖 **BLIP** (Salesforce): Fast and accurate image captioning
+    - 🧠 **GIT** (Microsoft): Advanced generative image-to-text model
+    *Simply upload an image or try one of the examples below!*
+    """)
+    with gr.Row():
+        with gr.Column(scale=1):
+            image_input = gr.Image(
+                type="pil",
+                label="📸 Upload Your Image",
+                height=400
+            )
+            true_caption_input = gr.Textbox(
+                label="🎯 True Caption (Optional)",
+                placeholder="Enter the correct caption to compare with AI predictions...",
+                lines=2
+            )
+            generate_btn = gr.Button(
+                "✨ Generate Captions",
+                variant="primary",
+                size="lg"
+            )
+        with gr.Column(scale=1):
+            output = gr.Textbox(
+                label="🤖 AI Generated Captions",
+                lines=20,
+                max_lines=25,
+                show_copy_button=True
+            )
+    # Example images
+    gr.Markdown("### 📋 Try These Examples:")
+    example_images = [
+        ["https://huggingface.co/datasets/mishig/sample_images/resolve/main/cat.jpg", "A cat sitting on a surface"],
+        ["https://huggingface.co/datasets/mishig/sample_images/resolve/main/dog.jpg", "A dog in a field"],
+        ["https://images.unsplash.com/photo-1506905925346-21bda4d32df4?w=500", "A mountain landscape with snow"],
+        ["https://images.unsplash.com/photo-1549298916-b41d501d3772?w=500", "A red sports car"],
+        ["https://images.unsplash.com/photo-1551963831-b3b1ca40c98e?w=500", "A breakfast with coffee and pastries"],
+    ]
+    gr.Examples(
+        examples=example_images,
+        inputs=[image_input, true_caption_input],
+        outputs=output,
+        fn=generate_captions,
+        cache_examples=False
+    )
+    # Event handlers
+    generate_btn.click(
+        fn=generate_captions,
+        inputs=[image_input, true_caption_input],
+        outputs=output
+    )
+    # Auto-generate when image is uploaded
+    image_input.change(
+        fn=generate_captions,
+        inputs=[image_input, true_caption_input],
+        outputs=output
+    )
+    gr.Markdown("""
+    ---
+    **🔧 Technical Details:**
+    - Models run on Hugging Face's infrastructure
+    - Processing time varies based on image size and complexity
+    - All models are open-source and publicly available
+    **📝 Tips:**
+    - Try different types of images (people, objects, landscapes, etc.)
+    - Compare the AI captions with your own description
+    - Larger images may take longer to process
+    """)
+# Launch the app
+if __name__ == "__main__":
+    demo.launch()