therealsaed commited on
Commit
2ea5846
·
verified ·
1 Parent(s): cb4b8cf

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +67 -0
  2. requirements.txt +7 -0
app.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Hugging Face Spaces App
3
+ Deploy this to HF Spaces for free hosting
4
+ """
5
+
6
+ import gradio as gr
7
+ from transformers import BlipProcessor, BlipForConditionalGeneration
8
+ from transformers import AutoProcessor, AutoModelForCausalLM
9
+ from PIL import Image
10
+ import torch
11
+
12
+ # Load models
13
+ print("Loading models...")
14
+ blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
15
+ blip_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
16
+
17
+ git_processor = AutoProcessor.from_pretrained("microsoft/git-base")
18
+ git_model = AutoModelForCausalLM.from_pretrained("microsoft/git-base")
19
+
20
+ def generate_captions(image, true_caption=""):
21
+ """Generate captions using multiple models"""
22
+ if image is None:
23
+ return "Please upload an image first."
24
+
25
+ results = []
26
+
27
+ # BLIP model
28
+ try:
29
+ inputs = blip_processor(image, return_tensors="pt")
30
+ out = blip_model.generate(**inputs, max_length=50)
31
+ blip_caption = blip_processor.decode(out[0], skip_special_tokens=True)
32
+ results.append(f"**BLIP:** {blip_caption}")
33
+ except Exception as e:
34
+ results.append(f"**BLIP:** Error - {str(e)}")
35
+
36
+ # GIT model
37
+ try:
38
+ inputs = git_processor(images=image, return_tensors="pt")
39
+ generated_ids = git_model.generate(pixel_values=inputs.pixel_values, max_length=50)
40
+ git_caption = git_processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
41
+ results.append(f"**GIT:** {git_caption}")
42
+ except Exception as e:
43
+ results.append(f"**GIT:** Error - {str(e)}")
44
+
45
+ if true_caption:
46
+ results.insert(0, f"**True Caption:** {true_caption}")
47
+
48
+ return "\n\n".join(results)
49
+
50
+ # Create Gradio interface
51
+ demo = gr.Interface(
52
+ fn=generate_captions,
53
+ inputs=[
54
+ gr.Image(type="pil", label="Upload Image"),
55
+ gr.Textbox(label="True Caption (Optional)", placeholder="Enter the correct caption for comparison")
56
+ ],
57
+ outputs=gr.Textbox(label="Generated Captions", lines=10),
58
+ title="🤖 AI Image Captioning",
59
+ description="Upload an image and get captions from multiple AI models!",
60
+ examples=[
61
+ ["https://huggingface.co/datasets/mishig/sample_images/resolve/main/cat.jpg", ""],
62
+ ["https://huggingface.co/datasets/mishig/sample_images/resolve/main/dog.jpg", ""],
63
+ ]
64
+ )
65
+
66
+ if __name__ == "__main__":
67
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ Flask==2.3.3
2
+ Werkzeug==2.3.7
3
+ Pillow==10.0.1
4
+ requests==2.31.0
5
+ numpy==1.24.3
6
+ opencv-python==4.8.1.78
7
+ scipy==1.11.3