Spaces:
Running
Running
File size: 1,313 Bytes
9bcdecb b6538da 9bcdecb b6538da 9bcdecb b6538da 9bcdecb b6538da 9bcdecb b6538da 9bcdecb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 |
import gradio as gr
from transformers import AutoProcessor, AutoModelForImageTextToText
from PIL import Image
# Load model & processor once at startup
processor = AutoProcessor.from_pretrained("ds4sd/SmolDocling-256M-preview")
model = AutoModelForImageTextToText.from_pretrained("ds4sd/SmolDocling-256M-preview")
def smoldocling_readimage(image, prompt_text):
messages = [
{"role": "user", "content": [{"type": "image"}, {"type": "text", "text": prompt_text}]}
]
prompt = processor.apply_chat_template(messages, add_generation_prompt=True)
inputs = processor(text=prompt, images=[image], return_tensors="pt")
outputs = model.generate(**inputs, max_new_tokens=1024)
prompt_length = inputs.input_ids.shape[1]
generated = outputs[:, prompt_length:]
result = processor.batch_decode(generated, skip_special_tokens=False)[0]
return result.replace("<end_of_utterance>", "").strip()
# Gradio UI
demo = gr.Interface(
fn=smoldocling_readimage,
inputs=[
gr.Image(type="pil", label="Upload Image"),
gr.Textbox(lines=1, placeholder="Enter prompt (e.g. Convert to docling)", label="Prompt"),
],
outputs="text",
title="SmolDocling Web App",
description="Upload a document image and convert it to structured docling format."
)
demo.launch()
|