Staticaliza commited on
Commit
c60c480
·
verified ·
1 Parent(s): 560cd7d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -8
app.py CHANGED
@@ -17,7 +17,7 @@ print(f"[SYSTEM] | Using {DEVICE} type compute device.")
17
  DEFAULT_INPUT = "Describe in one paragraph."
18
  MAX_FRAMES = 64
19
 
20
- repo_name = "openbmb/MiniCPM-o-2_6-int4" # "openbmb/MiniCPM-V-2_6-int4" # "openbmb/MiniCPM-V-2_6"
21
  repo = AutoModel.from_pretrained(repo_name, trust_remote_code=True)
22
  tokenizer = AutoTokenizer.from_pretrained(repo_name, trust_remote_code=True)
23
 
@@ -48,19 +48,28 @@ def encode_video(video_path):
48
  return frames
49
 
50
  @spaces.GPU(duration=60)
51
- def generate(image, video, instruction=DEFAULT_INPUT, sampling=False, temperature=0.7, top_p=0.8, top_k=100, repetition_penalty=1.05, max_tokens=512):
52
  # repo.to(DEVICE)
53
 
54
  print(image)
55
  print(video)
 
56
  print(instruction)
57
 
58
- if not video:
59
  image_data = Image.fromarray(image.astype('uint8'), 'RGB')
60
  inputs = [{"role": "user", "content": [image_data, instruction]}]
61
- else:
62
  video_data = encode_video(video)
63
- inputs = [{"role": "user", "content": video_data + [instruction]}]
 
 
 
 
 
 
 
 
64
 
65
  parameters = {
66
  "sampling": sampling,
@@ -69,8 +78,6 @@ def generate(image, video, instruction=DEFAULT_INPUT, sampling=False, temperatur
69
  "top_k": top_k,
70
  "repetition_penalty": repetition_penalty,
71
  "max_new_tokens": max_tokens,
72
- "use_image_id": False,
73
- "max_slice_nums": 2,
74
  }
75
 
76
  output = repo.chat(image=None, msgs=inputs, tokenizer=tokenizer, **parameters)
@@ -90,6 +97,7 @@ with gr.Blocks(css=css) as main:
90
  with gr.Column():
91
  input = gr.Image(label="Image")
92
  input_2 = gr.Video(label="Video")
 
93
  instruction = gr.Textbox(lines=1, value=DEFAULT_INPUT, label="Instruction")
94
  sampling = gr.Checkbox(value=False, label="Sampling")
95
  temperature = gr.Slider(minimum=0.01, maximum=1.99, step=0.01, value=0.7, label="Temperature")
@@ -103,7 +111,7 @@ with gr.Blocks(css=css) as main:
103
  with gr.Column():
104
  output = gr.Textbox(lines=1, value="", label="Output")
105
 
106
- submit.click(fn=generate, inputs=[input, input_2, instruction, sampling, temperature, top_p, top_k, repetition_penalty, max_tokens], outputs=[output], queue=False)
107
  maintain.click(cloud, inputs=[], outputs=[], queue=False)
108
 
109
  main.launch(show_api=True)
 
17
  DEFAULT_INPUT = "Describe in one paragraph."
18
  MAX_FRAMES = 64
19
 
20
+ repo_name = "openbmb/MiniCPM-o-2_6" # "openbmb/MiniCPM-V-2_6-int4" # "openbmb/MiniCPM-V-2_6"
21
  repo = AutoModel.from_pretrained(repo_name, trust_remote_code=True)
22
  tokenizer = AutoTokenizer.from_pretrained(repo_name, trust_remote_code=True)
23
 
 
48
  return frames
49
 
50
  @spaces.GPU(duration=60)
51
+ def generate(image, video, audio, instruction=DEFAULT_INPUT, sampling=False, temperature=0.7, top_p=0.8, top_k=100, repetition_penalty=1.05, max_tokens=512):
52
  # repo.to(DEVICE)
53
 
54
  print(image)
55
  print(video)
56
+ print(audio)
57
  print(instruction)
58
 
59
+ if image is not None:
60
  image_data = Image.fromarray(image.astype('uint8'), 'RGB')
61
  inputs = [{"role": "user", "content": [image_data, instruction]}]
62
+ elif video is not None:
63
  video_data = encode_video(video)
64
+ inputs = [{"role": "user", "content": [video_data, instruction]}]
65
+ elif audio is not None:
66
+ if isinstance(audio, str):
67
+ audio_data, _ = librosa.load(audio, sr=16000, mono=True)
68
+ else:
69
+ audio_data = audio
70
+ inputs = [{"role": "user", "content": [audio_data, instruction]}]
71
+ else:
72
+ return "No input provided."
73
 
74
  parameters = {
75
  "sampling": sampling,
 
78
  "top_k": top_k,
79
  "repetition_penalty": repetition_penalty,
80
  "max_new_tokens": max_tokens,
 
 
81
  }
82
 
83
  output = repo.chat(image=None, msgs=inputs, tokenizer=tokenizer, **parameters)
 
97
  with gr.Column():
98
  input = gr.Image(label="Image")
99
  input_2 = gr.Video(label="Video")
100
+ input_3 = gr.Audio(label="Audio"),
101
  instruction = gr.Textbox(lines=1, value=DEFAULT_INPUT, label="Instruction")
102
  sampling = gr.Checkbox(value=False, label="Sampling")
103
  temperature = gr.Slider(minimum=0.01, maximum=1.99, step=0.01, value=0.7, label="Temperature")
 
111
  with gr.Column():
112
  output = gr.Textbox(lines=1, value="", label="Output")
113
 
114
+ submit.click(fn=generate, inputs=[input, input_2, input_3, instruction, sampling, temperature, top_p, top_k, repetition_penalty, max_tokens], outputs=[output], queue=False)
115
  maintain.click(cloud, inputs=[], outputs=[], queue=False)
116
 
117
  main.launch(show_api=True)