Spaces:

leonarb
/

olmocr-demo

Running

App Files Files Community

leonarb commited on May 5

Commit

af75cff

verified ·

1 Parent(s): 845c4d4

Update app.py

Browse files

Files changed (1) hide show

app.py +26 -36

app.py CHANGED Viewed

@@ -1,51 +1,45 @@
 import os
-# Set Hugging Face and Torch cache to a guaranteed-writable location
-cache_dir = "/tmp/huggingface_cache"
-os.environ["HF_HOME"] = cache_dir
-os.environ["TORCH_HOME"] = cache_dir
-# Create the directory if it doesn't exist
-os.makedirs(cache_dir, exist_ok=True)
-import gradio as gr
 import torch
-from PyPDF2 import PdfReader
-from io import BytesIO
 from PIL import Image
 from transformers import AutoProcessor, Qwen2VLForConditionalGeneration
 from olmocr.data.renderpdf import render_pdf_to_base64png
 from olmocr.prompts import build_finetuning_prompt
 from olmocr.prompts.anchor import get_anchor_text
-from ebooklib import epub
-import base64
-import tempfile
-# Load model
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 model = Qwen2VLForConditionalGeneration.from_pretrained(
-    "allenai/olmOCR-7B-0225-preview", torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32
 ).eval().to(device)
 processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-7B-Instruct")
 def ocr_page(pdf_path, page_num):
-    # Render page to base64 PNG
     image_b64 = render_pdf_to_base64png(pdf_path, page_num + 1, target_longest_image_dim=1024)
     anchor_text = get_anchor_text(pdf_path, page_num + 1, pdf_engine="pdfreport", target_length=4000)
     prompt = build_finetuning_prompt(anchor_text)
-    messages = [
-        {
-            "role": "user",
-            "content": [
-                {"type": "text", "text": prompt},
-                {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{image_b64}"}},
-            ],
-        }
-    ]
     prompt_text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
     main_image = Image.open(BytesIO(base64.b64decode(image_b64)))
@@ -65,7 +59,6 @@ def ocr_page(pdf_path, page_num):
     decoded = processor.tokenizer.batch_decode(new_tokens, skip_special_tokens=True)
     return decoded[0] if decoded else ""
 def convert_pdf_to_epub(pdf_file, title, author, language):
     with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_pdf:
         tmp_pdf.write(pdf_file.read())
@@ -74,18 +67,17 @@ def convert_pdf_to_epub(pdf_file, title, author, language):
     reader = PdfReader(tmp_pdf_path)
     num_pages = len(reader.pages)
-    # Create EPUB book
     book = epub.EpubBook()
     book.set_title(title)
     book.add_author(author)
     book.set_language(language)
-    # Use first page as cover
     cover_image_b64 = render_pdf_to_base64png(tmp_pdf_path, 1, target_longest_image_dim=1024)
     cover_image_bytes = base64.b64decode(cover_image_b64)
     book.set_cover("cover.jpg", cover_image_bytes)
-    # OCR and add pages
     for i in range(num_pages):
         text = ocr_page(tmp_pdf_path, i)
         chapter = epub.EpubHtml(title=f"Page {i+1}", file_name=f"page_{i+1}.xhtml", lang=language)
@@ -102,12 +94,10 @@ def convert_pdf_to_epub(pdf_file, title, author, language):
     with open(epub_path, "rb") as f:
         return epub_path, f.read()
 def interface_fn(pdf, title, author, language):
-    epub_path, epub_bytes = convert_pdf_to_epub(pdf, title, author, language)
     return epub_path
 demo = gr.Interface(
     fn=interface_fn,
     inputs=[
@@ -123,4 +113,4 @@ demo = gr.Interface(
 )
 if __name__ == "__main__":
-    demo.launch(share = True)

 import os
+import base64
+import tempfile
+from io import BytesIO
 import torch
+import gradio as gr
 from PIL import Image
+from PyPDF2 import PdfReader
+from ebooklib import epub
 from transformers import AutoProcessor, Qwen2VLForConditionalGeneration
 from olmocr.data.renderpdf import render_pdf_to_base64png
 from olmocr.prompts import build_finetuning_prompt
 from olmocr.prompts.anchor import get_anchor_text
+# Set Hugging Face and Torch cache to a guaranteed-writable location
+cache_dir = "/tmp/huggingface_cache"
+os.environ["HF_HOME"] = cache_dir
+os.environ["TORCH_HOME"] = cache_dir
+os.makedirs(cache_dir, exist_ok=True)
+# Load model and processor
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 model = Qwen2VLForConditionalGeneration.from_pretrained(
+    "allenai/olmOCR-7B-0225-preview",
+    torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32
 ).eval().to(device)
 processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-7B-Instruct")
 def ocr_page(pdf_path, page_num):
     image_b64 = render_pdf_to_base64png(pdf_path, page_num + 1, target_longest_image_dim=1024)
     anchor_text = get_anchor_text(pdf_path, page_num + 1, pdf_engine="pdfreport", target_length=4000)
     prompt = build_finetuning_prompt(anchor_text)
+    messages = [{
+        "role": "user",
+        "content": [
+            {"type": "text", "text": prompt},
+            {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{image_b64}"}}
+        ],
+    }]
     prompt_text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
     main_image = Image.open(BytesIO(base64.b64decode(image_b64)))
     decoded = processor.tokenizer.batch_decode(new_tokens, skip_special_tokens=True)
     return decoded[0] if decoded else ""
 def convert_pdf_to_epub(pdf_file, title, author, language):
     with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_pdf:
         tmp_pdf.write(pdf_file.read())
     reader = PdfReader(tmp_pdf_path)
     num_pages = len(reader.pages)
     book = epub.EpubBook()
     book.set_title(title)
     book.add_author(author)
     book.set_language(language)
+    # Set cover from page 1
     cover_image_b64 = render_pdf_to_base64png(tmp_pdf_path, 1, target_longest_image_dim=1024)
     cover_image_bytes = base64.b64decode(cover_image_b64)
     book.set_cover("cover.jpg", cover_image_bytes)
+    # Add OCR'd pages as chapters
     for i in range(num_pages):
         text = ocr_page(tmp_pdf_path, i)
         chapter = epub.EpubHtml(title=f"Page {i+1}", file_name=f"page_{i+1}.xhtml", lang=language)
     with open(epub_path, "rb") as f:
         return epub_path, f.read()
 def interface_fn(pdf, title, author, language):
+    epub_path, _ = convert_pdf_to_epub(pdf, title, author, language)
     return epub_path
 demo = gr.Interface(
     fn=interface_fn,
     inputs=[
 )
 if __name__ == "__main__":
+    demo.launch(share=True)