File size: 5,068 Bytes
9c79daa
 
 
 
 
 
 
5d15f06
 
9c79daa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5d15f06
 
 
 
 
 
9c79daa
 
 
 
 
 
 
 
 
 
5d15f06
9c79daa
 
 
 
 
5d15f06
9c79daa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5d15f06
9c79daa
 
 
 
 
 
 
 
 
 
5d15f06
9c79daa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
import gradio as gr
import supervision as sv
import torch
import spaces

from utils.annotate import annotate_with_boxes
from utils.models import load_models, run_inference, CHECKPOINTS
from utils.tasks import TASK_NAMES, TASKS, OBJECT_DETECTION_TASK_NAME, \
    CAPTION_TASK_NAMES

MARKDOWN = """
# Better Florence-2 Playground πŸ”₯
<div>
    <a href="https://colab.research.google.com/github/roboflow-ai/notebooks/blob/main/notebooks/how-to-finetune-florence-2-on-detection-dataset.ipynb">
        <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Colab" style="display:inline-block;">
    </a>
    <a href="https://blog.roboflow.com/florence-2/">
        <img src="https://raw.githubusercontent.com/roboflow-ai/notebooks/main/assets/badges/roboflow-blogpost.svg" alt="Roboflow" style="display:inline-block;">
    </a>
    <a href="https://arxiv.org/abs/2311.06242">
        <img src="https://img.shields.io/badge/arXiv-2311.06242-b31b1b.svg" alt="arXiv" style="display:inline-block;">
    </a>
    <a href="https://www.youtube.com/watch?v=i3KjYgxNH6w">
        <img src="https://badges.aleen42.com/src/youtube.svg" alt="YouTube" style="display:inline-block;">
    </a>
</div>
"""

# OBJECT_DETECTION_EXAMPLES = [
#     ["microsoft/Florence-2-large-ft", "Object Detection", "https://media.roboflow.com/notebooks/examples/dog-2.jpeg"]
# ]
# CAPTION_EXAMPLES = [
#     ["microsoft/Florence-2-large-ft", "Caption", "https://media.roboflow.com/notebooks/examples/dog-2.jpeg"]
# ]

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
MODELS, PROCESSORS = load_models(DEVICE)


@spaces.GPU
def process(checkpoint_dropdown, task_dropdown, image_input):
    model = MODELS[checkpoint_dropdown]
    processor = PROCESSORS[checkpoint_dropdown]
    task = TASKS[task_dropdown]
    if task_dropdown == OBJECT_DETECTION_TASK_NAME:
        _, response = run_inference(
            model, processor, DEVICE, image_input, task)
        detections = sv.Detections.from_lmm(
            lmm=sv.LMM.FLORENCE_2, result=response, resolution_wh=image_input.size)
        return annotate_with_boxes(image_input, detections)
    elif task_dropdown in CAPTION_TASK_NAMES:
        _, response = run_inference(
            model, processor, DEVICE, image_input, task)
        return response[task]


with gr.Blocks() as demo:
    gr.Markdown(MARKDOWN)
    with gr.Row():
        checkpoint_dropdown_component = gr.Dropdown(
            choices=CHECKPOINTS,
            value=CHECKPOINTS[0],
            label="Model", info="Select a Florence 2 model to use.")
        task_dropdown_component = gr.Dropdown(
            choices=TASK_NAMES,
            value=TASK_NAMES[0],
            label="Task", info="Select a task to perform with the model.")

    with gr.Row():
        with gr.Column():
            image_input_component = gr.Image(type='pil', label='Image Input')
            submit_button_component = gr.Button(value='Submit', variant='primary')

        with gr.Column():
            @gr.render(inputs=task_dropdown_component)
            def show_output(text):
                if text == OBJECT_DETECTION_TASK_NAME:
                    image_output_component = gr.Image(type='pil', label='Image Output')
                    submit_button_component.click(
                        fn=process,
                        inputs=[
                            checkpoint_dropdown_component,
                            task_dropdown_component,
                            image_input_component
                        ],
                        outputs=image_output_component
                    )
                elif text in CAPTION_TASK_NAMES:
                    text_output_component = gr.Textbox(label='Caption Output')
                    submit_button_component.click(
                        fn=process,
                        inputs=[
                            checkpoint_dropdown_component,
                            task_dropdown_component,
                            image_input_component
                        ],
                        outputs=text_output_component
                    )

    # @gr.render(inputs=task_dropdown_component)
    # def show_examples(text):
    #     if text == "Object Detection":
    #         gr.Examples(
    #             fn=process,
    #             examples=OBJECT_DETECTION_EXAMPLES,
    #             inputs=[
    #                 checkpoint_dropdown_component,
    #                 task_dropdown_component,
    #                 image_input_component
    #             ],
    #             outputs=image_output_component
    #         )
    #     elif text == "Caption":
    #         gr.Examples(
    #             fn=process,
    #             examples=CAPTION_EXAMPLES,
    #             inputs=[
    #                 checkpoint_dropdown_component,
    #                 task_dropdown_component,
    #                 image_input_component
    #             ],
    #             outputs=text_output_component
    #         )

demo.launch(debug=False, show_error=True, max_threads=1)