Spaces:

matteomarjanovic
/

draptic-demo

Runtime error

App Files Files Community

matteomarjanovic commited on Feb 14

Commit

166d9fd

1 Parent(s): 8749689

switch to groq for image description

Browse files

Files changed (2) hide show

app.py +29 -22
requirements.txt +1 -1

app.py CHANGED Viewed

@@ -6,7 +6,8 @@ import spaces #[uncomment to use ZeroGPU]
 from diffusers import DiffusionPipeline
 import torch
 import subprocess
-from transformers import IdeficsForVisionText2Text, AutoProcessor
 subprocess.run("rm -rf /data-nvme/zerogpu-offload/*", env={}, shell=True)
@@ -28,10 +29,9 @@ pipe.load_lora_weights(lora_path, weight_name=weigths_file)
 MAX_SEED = np.iinfo(np.int32).max
 MAX_IMAGE_SIZE = 1024
-# Load IDEFICS model for generate the prompt
-checkpoint = "HuggingFaceM4/idefics-9b"
-processor = AutoProcessor.from_pretrained(checkpoint)
-idefics_model = IdeficsForVisionText2Text.from_pretrained(checkpoint, torch_dtype=torch.bfloat16, device_map="auto")
 @spaces.GPU #[uncomment to use ZeroGPU]
@@ -68,22 +68,29 @@ def generate_description_fn(
     image,
     progress=gr.Progress(track_tqdm=True),
 ):
-    if randomize_seed:
-        seed = random.randint(0, MAX_SEED)
-    prompt = [
-        "https://images.unsplash.com/photo-1583160247711-2191776b4b91?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=3542&q=80",
-    ]
-    generator = torch.Generator().manual_seed(seed)
-    inputs = processor(prompt, return_tensors="pt").to("cuda")
-    bad_words_ids = processor.tokenizer(["<image>", "<fake_token_around_image>"], add_special_tokens=False).input_ids
-    generated_ids = idefics_model.generate(**inputs, max_new_tokens=10, bad_words_ids=bad_words_ids)
-    generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)
-    return generated_text[0]
 examples = [
@@ -105,7 +112,7 @@ with gr.Blocks(css=css) as demo:
     with gr.Row():
         with gr.Column(elem_id="col-input-image"):
             gr.Markdown(" # Drop your image here")
-            input_image = gr.Image()
             generate_button = gr.Button("Generate", scale=0, variant="primary")
             generated_prompt = gr.Markdown("")
         with gr.Column(elem_id="col-container"):

 from diffusers import DiffusionPipeline
 import torch
 import subprocess
+from groq import Groq
+import base64
 subprocess.run("rm -rf /data-nvme/zerogpu-offload/*", env={}, shell=True)
 MAX_SEED = np.iinfo(np.int32).max
 MAX_IMAGE_SIZE = 1024
+def encode_image(image_path):
+  with open(image_path, "rb") as image_file:
+    return base64.b64encode(image_file.read()).decode('utf-8')
 @spaces.GPU #[uncomment to use ZeroGPU]
     image,
     progress=gr.Progress(track_tqdm=True),
 ):
+    base64_image = encode_image(image)
+    client = Groq()
+    chat_completion = client.chat.completions.create(
+        messages=[
+            {
+                "role": "user",
+                "content": [
+                    {"type": "text", "text": "What's in this image?"},
+                    {
+                        "type": "image_url",
+                        "image_url": {
+                            "url": f"data:image/jpeg;base64,{base64_image}",
+                        },
+                    },
+                ],
+            }
+        ],
+        model="llama-3.2-11b-vision-preview",
+    )
+    return chat_completion.choices[0].message.content
 examples = [
     with gr.Row():
         with gr.Column(elem_id="col-input-image"):
             gr.Markdown(" # Drop your image here")
+            input_image = gr.Image(type="filepath")
             generate_button = gr.Button("Generate", scale=0, variant="primary")
             generated_prompt = gr.Markdown("")
         with gr.Column(elem_id="col-container"):

requirements.txt CHANGED Viewed

@@ -6,4 +6,4 @@ transformers
 xformers
 sentencepiece
 peft
-bitsandbytes

 xformers
 sentencepiece
 peft
+groq