Spaces:
Sleeping
Sleeping
File size: 1,098 Bytes
ddebdd0 d1b0574 ddebdd0 c65a1f7 ddebdd0 d1b0574 ea1edb1 ddebdd0 5e14c45 ea1edb1 ddebdd0 c50c7e8 ddebdd0 c50c7e8 ddebdd0 c65a1f7 bf55b92 ddebdd0 816d008 ddebdd0 c65a1f7 ddebdd0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 |
import spaces
import gradio as gr
from transformers import AutoModel, AutoProcessor
from PIL import Image
import torch
import numpy as np
model_name_or_path = "lyttt/VLV_captioner"
model = AutoModel.from_pretrained(model_name_or_path, revision="master", trust_remote_code=True,low_cpu_mem_usage=False)
model = model.to("cuda")
def drop_incomplete_tail(text):
sentences = text.split('.')
complete_sentences = [s.strip() for s in sentences if s.strip()]
if not text.strip().endswith('.'):
complete_sentences = complete_sentences[:-1]
return '. '.join(complete_sentences) + ('.' if complete_sentences else '')
@spaces.GPU(duration=120)
def caption_image(image):
with torch.no_grad():
outputs = model([image], 77).generated_text[0]
return outputs
def greet(image):
if image.dtype != np.uint8:
image = (np.clip(image, 0, 1) * 255).astype(np.uint8)
image = Image.fromarray(image, mode='RGB')
raw_text = caption_image(image)
return drop_incomplete_tail(raw_text)
demo = gr.Interface(fn=greet, inputs="image", outputs="text")
demo.launch() |