File size: 2,940 Bytes
fef0a8d
99eb93c
fef0a8d
 
 
0c034e2
fef0a8d
 
baefccb
 
 
 
 
 
 
 
 
 
 
 
 
fef0a8d
 
 
 
 
 
 
5268082
 
fef0a8d
 
 
baefccb
96f2f76
 
 
 
 
 
 
 
fef0a8d
5268082
 
 
baefccb
 
5268082
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
201c325
5268082
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
# Imports
import gradio as gr
import spaces
import torch

from PIL import Image
from transformers import AutoModel, AutoTokenizer


import argparse
from decord import VideoReader, cpu
import io
import os
import copy
import requests
import base64
import json
import traceback
import re
import modelscope_studio as mgr

# Pre-Initialize
DEVICE = "auto"
if DEVICE == "auto":
    DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print(f"[SYSTEM] | Using {DEVICE} type compute device.")

# Variables
DEFAULT_INPUT = "Describe in one paragraph."

repo = AutoModel.from_pretrained("openbmb/MiniCPM-V-2_6", torch_dtype=torch.bfloat16, trust_remote_code=True)
tokenizer = AutoTokenizer.from_pretrained("openbmb/MiniCPM-V-2_6", trust_remote_code=True)

repo.eval()
css = '''
.gradio-container{max-width: 560px !important}
h1{text-align:center}
footer {
    visibility: hidden
}
'''

# Functions
@spaces.GPU(duration=60)
def generate(image, instruction=DEFAULT_INPUT, sampling=False, temperature=0.7, top_p=0.8, top_k=100, repetition_penalty=1.05, max_tokens=512):
    global model, tokenizer

    print(image)
    image_rgb = Image.open(image).convert("RGB")
    print(image_rgb, instruction)

    inputs = [{"role": "user", "content": [image_rgb, instruction]}]

    parameters = {
        "sampling": sampling,
        "temperature": temperature,
        "top_p": top_p,
        "top_k": top_k,
        "repetition_penalty": repetition_penalty,
        "max_new_tokens": max_tokens
    }
    
    output = model.chat(image=None, msgs=inputs, tokenizer=tokenizer, **parameters)
    
    return output
    
def cloud():
    print("[CLOUD] | Space maintained.")

# Initialize
with gr.Blocks(css=css) as main:
    with gr.Column():
        gr.Markdown("🪄 Analyze images and caption them using state-of-the-art openbmb/MiniCPM-V-2_6.")
        
    with gr.Column():
        input = gr.Image(label="Image")
        instruction = gr.Textbox(lines=1, value=DEFAULT_INPUT, label="Instruction")
        sampling = gr.Checkbox(value=False, label="Sampling")
        temperature = gr.Slider(minimum=0, maximum=2, step=0.01, value=0.7, label="Temperature")
        top_p = gr.Slider(minimum=0, maximum=1, step=0.01, value=0.8, label="Top P")
        top_k = gr.Slider(minimum=0, maximum=1000, step=1, value=100, label="Top K")
        repetition_penalty = gr.Slider(minimum=0, maximum=2, step=0.01, value=1.05, label="Repetition Penalty")
        max_tokens = gr.Slider(minimum=1, maximum=4096, step=1, value=512, label="Max Tokens")
        submit = gr.Button("▶")
        maintain = gr.Button("☁️")
        
    with gr.Column():
        output = gr.Textbox(lines=1, value="", label="Output")

    submit.click(fn=generate, inputs=[input, instruction, sampling, temperature, top_p, top_k, repetition_penalty, max_tokens], outputs=[output], queue=False)
    maintain.click(cloud, inputs=[], outputs=[], queue=False)

main.launch(show_api=True)