therealsaed commited on
Commit
24b38ad
Β·
verified Β·
1 Parent(s): 983c21d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +212 -67
app.py CHANGED
@@ -1,67 +1,212 @@
1
- """
2
- Hugging Face Spaces App
3
- Deploy this to HF Spaces for free hosting
4
- """
5
-
6
- import gradio as gr
7
- from transformers import BlipProcessor, BlipForConditionalGeneration
8
- from transformers import AutoProcessor, AutoModelForCausalLM
9
- from PIL import Image
10
- import torch
11
-
12
- # Load models
13
- print("Loading models...")
14
- blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
15
- blip_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
16
-
17
- git_processor = AutoProcessor.from_pretrained("microsoft/git-base")
18
- git_model = AutoModelForCausalLM.from_pretrained("microsoft/git-base")
19
-
20
- def generate_captions(image, true_caption=""):
21
- """Generate captions using multiple models"""
22
- if image is None:
23
- return "Please upload an image first."
24
-
25
- results = []
26
-
27
- # BLIP model
28
- try:
29
- inputs = blip_processor(image, return_tensors="pt")
30
- out = blip_model.generate(**inputs, max_length=50)
31
- blip_caption = blip_processor.decode(out[0], skip_special_tokens=True)
32
- results.append(f"**BLIP:** {blip_caption}")
33
- except Exception as e:
34
- results.append(f"**BLIP:** Error - {str(e)}")
35
-
36
- # GIT model
37
- try:
38
- inputs = git_processor(images=image, return_tensors="pt")
39
- generated_ids = git_model.generate(pixel_values=inputs.pixel_values, max_length=50)
40
- git_caption = git_processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
41
- results.append(f"**GIT:** {git_caption}")
42
- except Exception as e:
43
- results.append(f"**GIT:** Error - {str(e)}")
44
-
45
- if true_caption:
46
- results.insert(0, f"**True Caption:** {true_caption}")
47
-
48
- return "\n\n".join(results)
49
-
50
- # Create Gradio interface
51
- demo = gr.Interface(
52
- fn=generate_captions,
53
- inputs=[
54
- gr.Image(type="pil", label="Upload Image"),
55
- gr.Textbox(label="True Caption (Optional)", placeholder="Enter the correct caption for comparison")
56
- ],
57
- outputs=gr.Textbox(label="Generated Captions", lines=10),
58
- title="πŸ€– AI Image Captioning",
59
- description="Upload an image and get captions from multiple AI models!",
60
- examples=[
61
- ["https://huggingface.co/datasets/mishig/sample_images/resolve/main/cat.jpg", ""],
62
- ["https://huggingface.co/datasets/mishig/sample_images/resolve/main/dog.jpg", ""],
63
- ]
64
- )
65
-
66
- if __name__ == "__main__":
67
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Hugging Face Spaces App - Image Captioning
3
+ Deploy this to HF Spaces for free hosting
4
+ """
5
+
6
+ import gradio as gr
7
+ import torch
8
+ from PIL import Image
9
+ import time
10
+
11
+ def load_models():
12
+ """Load models with error handling"""
13
+ models = {}
14
+
15
+ try:
16
+ from transformers import BlipProcessor, BlipForConditionalGeneration
17
+ print("Loading BLIP model...")
18
+ models['blip_processor'] = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
19
+ models['blip_model'] = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
20
+ print("βœ… BLIP loaded successfully")
21
+ except Exception as e:
22
+ print(f"❌ BLIP failed to load: {e}")
23
+ models['blip_error'] = str(e)
24
+
25
+ try:
26
+ from transformers import AutoProcessor, AutoModelForCausalLM
27
+ print("Loading GIT model...")
28
+ models['git_processor'] = AutoProcessor.from_pretrained("microsoft/git-base")
29
+ models['git_model'] = AutoModelForCausalLM.from_pretrained("microsoft/git-base")
30
+ print("βœ… GIT loaded successfully")
31
+ except Exception as e:
32
+ print(f"❌ GIT failed to load: {e}")
33
+ models['git_error'] = str(e)
34
+
35
+ return models
36
+
37
+ # Load models at startup
38
+ print("πŸš€ Loading AI models...")
39
+ models = load_models()
40
+ print(f"πŸ“¦ Models loading completed")
41
+
42
+ def generate_captions(image, true_caption=""):
43
+ """Generate captions using available models"""
44
+ if image is None:
45
+ return "❌ Please upload an image first."
46
+
47
+ # Ensure image is in RGB format
48
+ if image.mode != 'RGB':
49
+ image = image.convert('RGB')
50
+
51
+ results = []
52
+ start_time = time.time()
53
+
54
+ # Add true caption if provided
55
+ if true_caption.strip():
56
+ results.append(f"**🎯 True Caption:**")
57
+ results.append(f"{true_caption.strip()}")
58
+ results.append("")
59
+
60
+ # BLIP model
61
+ if 'blip_model' in models:
62
+ try:
63
+ blip_start = time.time()
64
+ inputs = models['blip_processor'](image, return_tensors="pt")
65
+ out = models['blip_model'].generate(**inputs, max_length=50, num_beams=5)
66
+ blip_caption = models['blip_processor'].decode(out[0], skip_special_tokens=True)
67
+ blip_time = time.time() - blip_start
68
+
69
+ results.append(f"**πŸ€– BLIP Model:** ({blip_time:.2f}s)")
70
+ results.append(f"{blip_caption}")
71
+ results.append("")
72
+ except Exception as e:
73
+ results.append(f"**πŸ€– BLIP Model:** Error - {str(e)}")
74
+ results.append("")
75
+ elif 'blip_error' in models:
76
+ results.append(f"**πŸ€– BLIP Model:** Not available - {models['blip_error']}")
77
+ results.append("")
78
+
79
+ # GIT model
80
+ if 'git_model' in models:
81
+ try:
82
+ git_start = time.time()
83
+ inputs = models['git_processor'](images=image, return_tensors="pt")
84
+ generated_ids = models['git_model'].generate(
85
+ pixel_values=inputs.pixel_values,
86
+ max_length=50,
87
+ num_beams=5
88
+ )
89
+ git_caption = models['git_processor'].batch_decode(generated_ids, skip_special_tokens=True)[0]
90
+ git_time = time.time() - git_start
91
+
92
+ results.append(f"**🧠 GIT Model:** ({git_time:.2f}s)")
93
+ results.append(f"{git_caption}")
94
+ results.append("")
95
+ except Exception as e:
96
+ results.append(f"**🧠 GIT Model:** Error - {str(e)}")
97
+ results.append("")
98
+ elif 'git_error' in models:
99
+ results.append(f"**🧠 GIT Model:** Not available - {models['git_error']}")
100
+ results.append("")
101
+
102
+ total_time = time.time() - start_time
103
+ results.append("---")
104
+ results.append(f"**⏱️ Total Processing Time:** {total_time:.2f} seconds")
105
+ results.append("")
106
+ results.append("**πŸ“Š About the Models:**")
107
+ results.append("β€’ **BLIP**: Salesforce's Bootstrapping Language-Image Pre-training")
108
+ results.append("β€’ **GIT**: Microsoft's Generative Image-to-text Transformer")
109
+
110
+ return "\n".join(results)
111
+
112
+ # Create Gradio interface
113
+ with gr.Blocks(
114
+ title="AI Image Captioning",
115
+ theme=gr.themes.Soft(),
116
+ css="""
117
+ .gradio-container {
118
+ max-width: 1200px !important;
119
+ }
120
+ """
121
+ ) as demo:
122
+
123
+ gr.Markdown("""
124
+ # πŸ€– AI Image Captioning
125
+
126
+ Upload an image and get captions from multiple state-of-the-art AI models!
127
+
128
+ **Available Models:**
129
+ - πŸ€– **BLIP** (Salesforce): Fast and accurate image captioning
130
+ - 🧠 **GIT** (Microsoft): Advanced generative image-to-text model
131
+
132
+ *Simply upload an image or try one of the examples below!*
133
+ """)
134
+
135
+ with gr.Row():
136
+ with gr.Column(scale=1):
137
+ image_input = gr.Image(
138
+ type="pil",
139
+ label="πŸ“Έ Upload Your Image",
140
+ height=400
141
+ )
142
+
143
+ true_caption_input = gr.Textbox(
144
+ label="🎯 True Caption (Optional)",
145
+ placeholder="Enter the correct caption to compare with AI predictions...",
146
+ lines=2
147
+ )
148
+
149
+ generate_btn = gr.Button(
150
+ "✨ Generate Captions",
151
+ variant="primary",
152
+ size="lg"
153
+ )
154
+
155
+ with gr.Column(scale=1):
156
+ output = gr.Textbox(
157
+ label="πŸ€– AI Generated Captions",
158
+ lines=20,
159
+ max_lines=25,
160
+ show_copy_button=True
161
+ )
162
+
163
+ # Example images
164
+ gr.Markdown("### πŸ“‹ Try These Examples:")
165
+
166
+ example_images = [
167
+ ["https://huggingface.co/datasets/mishig/sample_images/resolve/main/cat.jpg", "A cat sitting on a surface"],
168
+ ["https://huggingface.co/datasets/mishig/sample_images/resolve/main/dog.jpg", "A dog in a field"],
169
+ ["https://images.unsplash.com/photo-1506905925346-21bda4d32df4?w=500", "A mountain landscape with snow"],
170
+ ["https://images.unsplash.com/photo-1549298916-b41d501d3772?w=500", "A red sports car"],
171
+ ["https://images.unsplash.com/photo-1551963831-b3b1ca40c98e?w=500", "A breakfast with coffee and pastries"],
172
+ ]
173
+
174
+ gr.Examples(
175
+ examples=example_images,
176
+ inputs=[image_input, true_caption_input],
177
+ outputs=output,
178
+ fn=generate_captions,
179
+ cache_examples=False
180
+ )
181
+
182
+ # Event handlers
183
+ generate_btn.click(
184
+ fn=generate_captions,
185
+ inputs=[image_input, true_caption_input],
186
+ outputs=output
187
+ )
188
+
189
+ # Auto-generate when image is uploaded
190
+ image_input.change(
191
+ fn=generate_captions,
192
+ inputs=[image_input, true_caption_input],
193
+ outputs=output
194
+ )
195
+
196
+ gr.Markdown("""
197
+ ---
198
+
199
+ **πŸ”§ Technical Details:**
200
+ - Models run on Hugging Face's infrastructure
201
+ - Processing time varies based on image size and complexity
202
+ - All models are open-source and publicly available
203
+
204
+ **πŸ“ Tips:**
205
+ - Try different types of images (people, objects, landscapes, etc.)
206
+ - Compare the AI captions with your own description
207
+ - Larger images may take longer to process
208
+ """)
209
+
210
+ # Launch the app
211
+ if __name__ == "__main__":
212
+ demo.launch()