Spaces:

retromarz
/

plavu_MA

Runtime error

App Files Files Community

retromarz commited on Jun 5

Commit

07a6bfd

verified ·

1 Parent(s): 2f010d1

simple Gradio app hosted on Hugging Face Spaces

Browse files

simple Gradio app hosted on Hugging Face Spaces (free version) that uses the fancyfeast/joy-caption-beta-one model for image captioning.

Files changed (1) hide show

app.py +54 -12

app.py CHANGED Viewed

@@ -1,13 +1,55 @@
-from gradio_client import Client, handle_file
-client = Client("fancyfeast/joy-caption-beta-one")
-result = client.predict(
-		input_image=handle_file('https://raw.githubusercontent.com/gradio-app/gradio/main/test/test_files/bus.png'),
-		prompt="Write a long detailed description for this image.",
-		temperature=0.6,
-		top_p=0.9,
-		max_new_tokens=512,
-		log_prompt=True,
-		api_name="/chat_joycaption"
 )
-print(result)

+import gradio as gr
+import torch
+from PIL import Image
+from transformers import LlavaForConditionalGeneration, AutoProcessor
+# Load the model and processor
+MODEL_PATH = "fancyfeast/llama-joycaption-beta-one-hf-llava"
+processor = AutoProcessor.from_pretrained(MODEL_PATH)
+model = LlavaForConditionalGeneration.from_pretrained(MODEL_PATH, torch_dtype=torch.bfloat16, device_map="auto")
+model.eval()
+# Define the captioning function
+def generate_caption(input_image: Image.Image, caption_type: str = "descriptive", caption_length: str = "medium") -> str:
+    if input_image is None:
+        return "Please upload an image."
+    # Prepare the prompt
+    prompt = f"Write a {caption_length} {caption_type} caption for this image."
+    convo = [
+        {
+            "role": "system",
+            "content": "You are a helpful assistant that generates accurate and relevant image captions."
+        },
+        {
+            "role": "user",
+            "content": prompt.strip()
+        }
+    ]
+    # Process the image and prompt
+    inputs = processor(images=input_image, text=convo[1]["content"], return_tensors="pt").to("cuda" if torch.cuda.is_available() else "cpu")
+    # Generate the caption
+    with torch.no_grad():
+        output = model.generate(**inputs, max_new_tokens=100, temperature=0.7, top_p=0.9)
+    # Decode the output
+    caption = processor.decode(output[0], skip_special_tokens=True)
+    return caption.strip()
+# Create the Gradio interface
+interface = gr.Interface(
+    fn=generate_caption,
+    inputs=[
+        gr.Image(label="Upload Image", type="pil"),
+        gr.Dropdown(choices=["descriptive", "casual", "social media"], label="Caption Type", value="descriptive"),
+        gr.Dropdown(choices=["short", "medium", "long"], label="Caption Length", value="medium")
+    ],
+    outputs=gr.Textbox(label="Generated Caption"),
+    title="Image Captioning with JoyCaption",
+    description="Upload an image to generate a caption using the fancyfeast/joy-caption-beta-one model."
 )
+if __name__ == "__main__":
+    interface.launch()