mansari722 commited on
Commit
252ff3b
·
verified ·
1 Parent(s): 12d2cee

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -0
app.py CHANGED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import AutoProcessor, AutoModelForVision2Seq
3
+ from PIL import Image
4
+ import torch
5
+
6
+ # Load the model and processor
7
+ model_name = "ds4sd/SmolDocling-256M-preview"
8
+ processor = AutoProcessor.from_pretrained(model_name)
9
+ model = AutoModelForVision2Seq.from_pretrained(
10
+ model_name, torch_dtype=torch.bfloat16
11
+ ).to("cuda" if torch.cuda.is_available() else "cpu")
12
+
13
+ # Define the inference function
14
+ def process_image(image):
15
+ inputs = processor(images=image, return_tensors="pt").to(model.device)
16
+ outputs = model.generate(**inputs, max_new_tokens=1024)
17
+ result = processor.batch_decode(outputs, skip_special_tokens=True)[0]
18
+ return result
19
+
20
+ # Create the Gradio interface
21
+ iface = gr.Interface(
22
+ fn=process_image,
23
+ inputs=gr.inputs.Image(type="pil"),
24
+ outputs="text",
25
+ title="SmolDocling Document Conversion",
26
+ description="Upload an image of a document page to convert it to structured text."
27
+ )
28
+
29
+ if __name__ == "__main__":
30
+ iface.launch()