llava-4bit

Runtime error

App Files Files Community

whan12 commited on Aug 14, 2024

Commit

8ed6e93

verified ·

1 Parent(s): 56451ce

Update app.py

Browse files

Files changed (1) hide show

app.py +169 -83

app.py CHANGED Viewed

@@ -4,15 +4,19 @@ import copy
 import gradio as gr
 import PIL.Image
 import torch
-from transformers import BitsAndBytesConfig, pipeline, LlavaNextProcessor, LlavaNextForConditionalGeneration
 import re
 import time
 DESCRIPTION = "# LLaVA 💪 - THE IRON PUMPING MACHINE VISION BEAST"
 model_id = "llava-hf/llava-v1.6-vicuna-7b-hf"
-pipe = LlavaNextForConditionalGeneration.from_pretrained(model_id, torch_dtype=torch.float16, low_cpu_mem_usage=True)
 def extract_response_pairs(text):
     turns = re.split(r'(USER:|ASSISTANT:)', text)[1:]
@@ -24,72 +28,69 @@ def extract_response_pairs(text):
     return conv_list
-def add_text(history, text):
-    history = history + [[text, None]]
-    return history, text
-def infer(image, prompt, temperature, length_penalty, repetition_penalty, max_length, min_length, top_p):
-    outputs = pipe(images=image, prompt=prompt,
-                   generate_kwargs={"temperature": temperature,
-                                    "length_penalty": length_penalty,
-                                    "repetition_penalty": repetition_penalty,
-                                    "max_length": max_length,
-                                    "min_length": min_length,
-                                    "top_p": top_p})
-    inference_output = outputs[0]["generated_text"]
-    return inference_output
-def arnold_speak(text):
-    # Add Arnold Schwarzenegger-style phrases and modify speech
-    arnold_phrases = [
-        "Come with me if you want to lift!",
-        "I'll be back... after my protein shake.",
-        "Hasta la vista, baby weight!",
-        "Get to da choppa... I mean, da squat rack!",
-        "You lack discipline! But don't worry, I'm here to pump you up!"
-    ]
-    text = text.replace(".", "!")  # More enthusiastic punctuation
-    text = text.replace("gym", "iron paradise")
-    text = text.replace("exercise", "pump iron")
-    text = text.replace("workout", "sculpt your physique")
-    # Add random Arnold phrase to the end
-    text += " " + arnold_phrases[torch.randint(0, len(arnold_phrases), (1,)).item()]
-    return text
-def bot(history_chat, text_input, image, temperature, length_penalty, repetition_penalty, max_length, min_length, top_p, arnold_mode):
     if text_input == "":
         gr.Warning("Please input text")
-    if image is None:
         gr.Warning("Please input image or wait for image to be uploaded before clicking submit.")
-    chat_history = " ".join([item for sublist in history_chat for item in sublist])  # Flatten history
-    if arnold_mode:
-        system_prompt = "you are a bodybuilding coach, and you sound like Arnold Schwarzenegger. Give advice on gains, training, and inspire me at the end. Use Arnold's catchphrases and speaking style."
-    else:
-        system_prompt = "You are a helpful AI assistant. Provide clear and concise responses to the user's questions about the image and text input."
-    chat_history = f"{system_prompt}\n{chat_history}\nUSER: <image>\n{text_input}\nASSISTANT:"
-    inference_result = infer(image, chat_history, temperature, length_penalty, repetition_penalty, max_length, min_length, top_p)
     chat_val = extract_response_pairs(inference_result)
     chat_state_list = copy.deepcopy(chat_val)
-    chat_state_list[-1][1] = ""  # empty last response
-    response = chat_val[-1][1]
-    if arnold_mode:
-        response = arnold_speak(response)
-    for character in response:
         chat_state_list[-1][1] += character
         time.sleep(0.05)
         yield chat_state_list
 css = """
   #mkd {
     height: 500px;
@@ -97,52 +98,137 @@ css = """
     border: 1px solid #ccc;
   }
   """
-with gr.Blocks(css=css) as demo:
     gr.Markdown(DESCRIPTION)
     gr.Markdown("""## LLaVA, one of the greatest multimodal chat models is now available in Transformers with 4-bit quantization! ⚡️
     See the docs here: https://huggingface.co/docs/transformers/main/en/model_doc/llava.""")
     chatbot = gr.Chatbot(label="Chat", show_label=False)
     gr.Markdown("Input image and text and start chatting 👇")
     with gr.Row():
-        image = gr.Image(type="pil")
-        text_input = gr.Text(label="Chat Input", show_label=False, max_lines=3, container=False)
     history_chat = gr.State(value=[])
-    arnold_mode = gr.Checkbox(label="Arnold Schwarzenegger Mode", value=False)
     with gr.Accordion(label="Advanced settings", open=False):
-        temperature = gr.Slider(label="Temperature", info="Used with nucleus sampling.", minimum=0.5, maximum=1.0, step=0.1, value=1.0)
-        length_penalty = gr.Slider(label="Length Penalty", info="Set to larger for longer sequence, used with beam search.", minimum=-1.0, maximum=2.0, step=0.2, value=1.0)
-        repetition_penalty = gr.Slider(label="Repetition Penalty", info="Larger value prevents repetition.", minimum=1.0, maximum=5.0, step=0.5, value=1.5)
-        max_length = gr.Slider(label="Max Length", minimum=1, maximum=500, step=1, value=200)
-        min_length = gr.Slider(label="Minimum Length", minimum=1, maximum=100, step=1, value=1)
-        top_p = gr.Slider(label="Top P", info="Used with nucleus sampling.", minimum=0.5, maximum=1.0, step=0.1, value=0.9)
-    chat_inputs = [image, text_input, temperature, length_penalty, repetition_penalty, max_length, min_length, top_p, history_chat, arnold_mode]
     with gr.Row():
-        clear_chat_button = gr.Button("Clear")
-        cancel_btn = gr.Button("Stop Generation")
-        chat_button = gr.Button("Submit", variant="primary")
-    chat_event1 = chat_button.click(add_text, [chatbot, text_input], [chatbot, text_input]).then(
-        bot, chat_inputs, chatbot
-    )
-    chat_event2 = text_input.submit(add_text, [chatbot, text_input], [chatbot, text_input]).then(
-        bot, chat_inputs, chatbot
     )
-    clear_chat_button.click(lambda: ([], []), inputs=None, outputs=[chatbot, history_chat], queue=False, api_name="clear")
-    image.change(lambda: ([], []), inputs=None, outputs=[chatbot, history_chat], queue=False)
-    cancel_btn.click(None, [], [], cancels=[chat_event1, chat_event2])
-    examples = [
-        ["./examples/baklava.png", "How to make this pastry?"],
-        ["./examples/bee.png", "Describe this image."]
-    ]
-    gr.Examples(examples=examples, inputs=[image, text_input])
 if __name__ == "__main__":
     demo.queue(max_size=10).launch(debug=True)

 import gradio as gr
 import PIL.Image
 import torch
+from transformers import BitsAndBytesConfig, pipeline,LlavaNextProcessor, LlavaNextForConditionalGeneration
+import torch
 import re
 import time
 DESCRIPTION = "# LLaVA 💪 - THE IRON PUMPING MACHINE VISION BEAST"
 model_id = "llava-hf/llava-v1.6-vicuna-7b-hf"
+pipe =  LlavaNextForConditionalGeneration.from_pretrained(model_id , torch_dtype=torch.float16, low_cpu_mem_usage=True)
 def extract_response_pairs(text):
     turns = re.split(r'(USER:|ASSISTANT:)', text)[1:]
     return conv_list
+def add_text(history, text):
+  history = history.append([text, None])
+  return history, text
+def infer(image, prompt,
+            temperature,
+            length_penalty,
+            repetition_penalty,
+            max_length,
+            min_length,
+            top_p):
+  outputs = pipe(images=image, prompt=prompt,
+                  generate_kwargs={"temperature":temperature,
+                  "length_penalty":length_penalty,
+                  "repetition_penalty":repetition_penalty,
+                  "max_length":max_length,
+                  "min_length":min_length,
+                  "top_p":top_p})
+  inference_output = outputs[0]["generated_text"]
+  return inference_output
+def bot(history_chat, text_input, image,
+            temperature,
+            length_penalty,
+            repetition_penalty,
+            max_length,
+            min_length,
+            top_p):
     if text_input == "":
         gr.Warning("Please input text")
+    if image==None:
         gr.Warning("Please input image or wait for image to be uploaded before clicking submit.")
+    chat_history = " ".join(history_chat) # history as a str to be passed to model
+    chat_history = "you are a bodybuilding coach,and you sounds like arnold schwarzenegger, give advice on my gains, training and inspire me at the end"+chat_history + f"USER: <image>\n{text_input}\nASSISTANT:" # add text input for prompting
+    inference_result = infer(image, chat_history,
+            temperature,
+            length_penalty,
+            repetition_penalty,
+            max_length,
+            min_length,
+            top_p)
+    # return inference and parse for new history
     chat_val = extract_response_pairs(inference_result)
+    # create history list for yielding the last inference response
     chat_state_list = copy.deepcopy(chat_val)
+    chat_state_list[-1][1] = "" # empty last response
+    # add characters iteratively
+    for character in chat_val[-1][1]:
         chat_state_list[-1][1] += character
         time.sleep(0.05)
+        # yield history but with last response being streamed
         yield chat_state_list
 css = """
   #mkd {
     height: 500px;
     border: 1px solid #ccc;
   }
   """
+with gr.Blocks(css="style.css") as demo:
     gr.Markdown(DESCRIPTION)
     gr.Markdown("""## LLaVA, one of the greatest multimodal chat models is now available in Transformers with 4-bit quantization! ⚡️
     See the docs here: https://huggingface.co/docs/transformers/main/en/model_doc/llava.""")
     chatbot = gr.Chatbot(label="Chat", show_label=False)
     gr.Markdown("Input image and text and start chatting 👇")
     with gr.Row():
+      image = gr.Image(type="pil")
+      text_input = gr.Text(label="Chat Input", show_label=False, max_lines=3, container=False)
     history_chat = gr.State(value=[])
     with gr.Accordion(label="Advanced settings", open=False):
+        temperature = gr.Slider(
+            label="Temperature",
+            info="Used with nucleus sampling.",
+            minimum=0.5,
+            maximum=1.0,
+            step=0.1,
+            value=1.0,
+        )
+        length_penalty = gr.Slider(
+            label="Length Penalty",
+            info="Set to larger for longer sequence, used with beam search.",
+            minimum=-1.0,
+            maximum=2.0,
+            step=0.2,
+            value=1.0,
+        )
+        repetition_penalty = gr.Slider(
+            label="Repetition Penalty",
+            info="Larger value prevents repetition.",
+            minimum=1.0,
+            maximum=5.0,
+            step=0.5,
+            value=1.5,
+        )
+        max_length = gr.Slider(
+            label="Max Length",
+            minimum=1,
+            maximum=500,
+            step=1,
+            value=200,
+        )
+        min_length = gr.Slider(
+            label="Minimum Length",
+            minimum=1,
+            maximum=100,
+            step=1,
+            value=1,
+        )
+        top_p = gr.Slider(
+            label="Top P",
+            info="Used with nucleus sampling.",
+            minimum=0.5,
+            maximum=1.0,
+            step=0.1,
+            value=0.9,
+        )
+    chat_output = [
+        chatbot,
+        history_chat
+    ]
+    chat_inputs = [
+        image,
+        text_input,
+        temperature,
+        length_penalty,
+        repetition_penalty,
+        max_length,
+        min_length,
+        top_p,
+        history_chat
+    ]
     with gr.Row():
+      clear_chat_button = gr.Button("Clear")
+      cancel_btn = gr.Button("Stop Generation")
+      chat_button = gr.Button("Submit", variant="primary")
+    chat_event1 = chat_button.click(add_text, [chatbot, text_input], [chatbot, text_input]).then(bot, [chatbot, text_input,
+                                                                                           image, temperature,
+        length_penalty,
+        repetition_penalty,
+        max_length,
+        min_length,
+        top_p], chatbot)
+    chat_event2 = text_input.submit(
+        add_text,
+        [chatbot, text_input],
+        [chatbot, text_input]
+    ).then(
+        fn=bot,
+        inputs=[chatbot, text_input, image, temperature,
+        length_penalty,
+        repetition_penalty,
+        max_length,
+        min_length,
+        top_p],
+        outputs=chatbot
+    )
+    clear_chat_button.click(
+        fn=lambda: ([], []),
+        inputs=None,
+        outputs=[
+            chatbot,
+            history_chat
+        ],
+        queue=False,
+        api_name="clear",
     )
+    image.change(
+        fn=lambda: ([], []),
+        inputs=None,
+        outputs=[
+            chatbot,
+            history_chat
+        ],
+        queue=False)
+    cancel_btn.click(
+        None, [], [],
+        cancels=[chat_event1, chat_event2]
+    )
+    examples = [["./examples/baklava.png", "How to make this pastry?"],["./examples/bee.png","Describe this image."]]
+    gr.Examples(examples=examples, inputs=[image, text_input, chat_inputs])
 if __name__ == "__main__":
     demo.queue(max_size=10).launch(debug=True)