Spaces:

ASC8384
/

P2P

Running

App Files Files Community

suntao.0 commited on 10 days ago

Commit

04aed77

1 Parent(s): 074e02d

init

Browse files

Files changed (13) hide show

.gitignore +175 -0
LICENSE +21 -0
README.md +169 -5
app.py +312 -0
figure_detection.py +90 -0
main.py +120 -0
poster/__init__.py +0 -0
poster/compress.py +36 -0
poster/figures.py +73 -0
poster/loader.py +27 -0
poster/poster.py +730 -0
requirements.txt +21 -0
start.py +63 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,175 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# UV
+#   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#uv.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
+.pdm.toml
+.pdm-python
+.pdm-build/
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+# Ruff stuff:
+.ruff_cache/
+# PyPI configuration file
+.pypirc
+.DS_Store

LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2025 multimodal-art-projection
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

README.md CHANGED Viewed

@@ -1,12 +1,176 @@
 ---
-title: P2P
-emoji: 🏆
-colorFrom: pink
-colorTo: gray
 sdk: gradio
 sdk_version: 5.31.0
 app_file: app.py
 pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: P2P Paper-to-Poster Generator
+emoji: 🎓
+colorFrom: blue
+colorTo: purple
 sdk: gradio
 sdk_version: 5.31.0
 app_file: app.py
 pinned: false
+license: mit
 ---
+# P2P: Automated Paper-to-Poster Generation and Fine-Grained Benchmark
+[![](https://img.shields.io/badge/arXiv-2505.17104-b31b1b.svg?style=for-the-badge)](https://arxiv.org/abs/2505.17104)
+[![Dataset - P2PInstruct](https://img.shields.io/badge/Dataset-P2PInstruct-blue)](https://huggingface.co/datasets/ASC8384/P2PInstruct)
+[![Dataset - P2PEval](https://img.shields.io/badge/Dataset-P2PEval-blue)](https://huggingface.co/datasets/ASC8384/P2PEval)
+## 🚀 Try it on Hugging Face Spaces
+This application is deployed on Hugging Face Spaces! You can try it directly in your browser without any installation:
+**🎓 [Launch P2P Paper-to-Poster Generator](https://huggingface.co/spaces/ASC8384/P2P)**
+### Quick Start on Spaces:
+1. Upload your PDF research paper
+2. Enter your OpenAI API key and base URL (if using proxy)
+3. Input the AI model name (e.g., gpt-4o-mini, claude-3-sonnet)
+4. Configure the figure detection service URL
+5. Click "Generate Poster" and wait for processing
+6. Preview the generated poster and download JSON/HTML files
+⚠️ **Requirements**:
+- Valid OpenAI API key with sufficient balance
+- Figure detection service URL for extracting images from PDFs
+- Compatible AI model (OpenAI, Claude, Gemini, etc.)
+💡 **Features**:
+- Real-time HTML poster preview
+- Direct JSON structure display
+- Support for multiple AI models
+- Flexible API configuration
+## Overview
+P2P is an AI-powered tool that automatically converts academic research papers into professional conference posters. This repository contains the code for generating and evaluating these posters, leveraging large language models to extract key information and create visually appealing presentations.
+The full research paper is available on [arXiv](https://arxiv.org/abs/2505.17104).
+**Note:** Due to the large size of the evaluation and training datasets, only simple samples are included in this repository. The complete datasets are available on HuggingFace:
+- [P2PInstruct](https://huggingface.co/datasets/ASC8384/P2PInstruct) - Training dataset
+- [P2PEval](https://huggingface.co/datasets/ASC8384/P2PEval) - Benchmark dataset
+## Repository Structure
+### Core Files
+- `main.py`: Main entry point for generating a poster from a single paper
+- `start.py`: Batch processing script for generating posters from multiple papers
+- `end.py`: Evaluation coordinator that processes generated posters
+- `evalv2.py`: Core evaluation logic with metrics and comparison methods
+- `figure_detection.py`: Utility for detecting and extracting figures from PDFs
+### Directories
+- `poster/`: Core poster generation logic
+  - `poster.py`: Main poster generation implementation
+  - `figures.py`: Figure extraction and processing utilities
+  - `compress.py`: Image compression utilities
+  - `loader.py`: PDF loading utilities
+- `eval/`: Evaluation tools and resources
+  - `eval_checklist.py`: Checklist-based evaluation implementation
+  - `predict_with_xgboost.py`: ML-based poster quality prediction
+  - `common.yaml`: Common evaluation parameters
+  - `xgboost_model.joblib`: Pre-trained evaluation model
+## Requirements
+- Python 3.10+
+- Dependencies listed in `requirements.txt`
+## Setup
+Install dependencies:
+```bash
+pip install -r requirements.txt
+playwright install
+```
+## Usage
+### Generating a Single Poster
+To generate a poster from a single paper:
+```bash
+# Deploy figure_detection first
+python main.py --url="URL_TO_PDF" --pdf="path/to/paper.pdf" --model="gpt-4o-mini" --output="output/poster.json"
+```
+#### Parameters:
+- `--url`: URL for PDF processing service (detecting and extracting figures)
+- `--pdf`: Path to the local PDF file
+- `--model`: LLM model to use (default: gpt-4o-mini)
+- `--output`: Output file path (default: poster.json)
+#### Output Files:
+- `poster.json`: JSON representation of the poster
+- `poster.html`: HTML version of the poster
+- `poster.png`: PNG image of the poster
+### Batch Generating Posters
+To generate posters for multiple papers:
+1. Organize your papers in a directory structure:
+```
+eval/data/
+  └─ paper_id_1/
+     └─ paper.pdf
+  └─ paper_id_2/
+     └─ paper.pdf
+  ...
+```
+2. Edit `start.py` to configure:
+   - `url`: URL for PDF processing service
+   - `input_dir`: Directory containing papers (default: "eval/data")
+   - `models`: List of AI models to use for generation
+3. Run the batch generation script:
+```bash
+python start.py
+```
+Generated posters will be saved to:
+```
+eval/temp-v2/{model_name}/{paper_id}/
+  └─ poster.json
+  └─ poster.html
+  └─ poster.png
+```
+### Evaluating Posters
+To evaluate generated posters:
+1. Ensure reference materials exist:
+```
+eval/data/{paper_id}/
+  └─ poster.png (reference poster)
+  └─ checklist.yaml (evaluation checklist)
+```
+2. Run the evaluation script:
+```bash
+python end.py
+```
+Evaluation results will be saved to `eval/temp-v2/results.jsonl`.
+## Citation
+If you find our work useful, please consider citing P2P:
+```bibtex
+@misc{sun2025p2pautomatedpapertopostergeneration,
+      title={P2P: Automated Paper-to-Poster Generation and Fine-Grained Benchmark},
+      author={Tao Sun and Enhao Pan and Zhengkai Yang and Kaixin Sui and Jiajun Shi and Xianfu Cheng and Tongliang Li and Wenhao Huang and Ge Zhang and Jian Yang and Zhoujun Li},
+      year={2025},
+      eprint={2505.17104},
+      archivePrefix={arXiv},
+      primaryClass={cs.CL},
+      url={https://arxiv.org/abs/2505.17104},
+}
+```

app.py ADDED Viewed

	@@ -0,0 +1,312 @@

+import gradio as gr
+import tempfile
+import os
+import json
+import shutil
+from pathlib import Path
+import base64
+from main import generate_paper_poster
+def process_paper_to_poster(
+    pdf_file,
+    model_choice,
+    figure_service_url,
+    openai_api_key,
+    openai_base_url
+):
+    """
+    处理上传的PDF文件并生成海报 - 支持实时状态更新
+    """
+    if pdf_file is None:
+        yield None, None, None, "❌ Please upload a PDF file first!"
+        return
+    if not openai_api_key.strip():
+        yield None, None, None, "❌ Please enter your OpenAI API Key!"
+        return
+    if not figure_service_url.strip():
+        yield None, None, None, "❌ Please enter the figure detection service URL!"
+        return
+    try:
+        # 初始状态
+        yield None, None, None, "🚀 Starting poster generation process..."
+        # 配置OpenAI设置
+        yield None, None, None, "⚙️ Configuring OpenAI API settings..."
+        os.environ['OPENAI_API_KEY'] = openai_api_key.strip()
+        if openai_base_url.strip():
+            os.environ['OPENAI_BASE_URL'] = openai_base_url.strip()
+        # 创建临时目录
+        yield None, None, None, "📁 Creating temporary workspace..."
+        temp_dir = tempfile.mkdtemp()
+        # 保存上传的PDF文件
+        yield None, None, None, "📄 Processing uploaded PDF file..."
+        pdf_path = os.path.join(temp_dir, "paper.pdf")
+        shutil.copy(pdf_file.name, pdf_path)
+        # 开始调用生成函数
+        yield None, None, None, "🔍 Extracting content from PDF and detecting figures..."
+        # 调用原始生成函数
+        poster, html = generate_paper_poster(
+            url=figure_service_url,
+            pdf=pdf_path,
+            vendor="openai",
+            model=model_choice,
+            text_prompt="",  # 使用默认提示
+            figures_prompt="",  # 使用默认提示
+            output=""  # 不再使用
+        )
+        # 图片处理完毕，开始生成JSON
+        yield None, None, None, "🖼️ Image processing completed! Generating JSON structure..."
+        # 将海报转换为JSON以便预览
+        json_content = json.dumps(poster.model_dump(), indent=2, ensure_ascii=False)
+        # JSON生成完毕，开始生成HTML
+        yield None, None, None, "📋 JSON file generated successfully! Creating HTML poster..."
+        # 创建持久化的临时文件用于下载
+        json_file = tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False, encoding='utf-8')
+        html_file = tempfile.NamedTemporaryFile(mode='w', suffix='.html', delete=False, encoding='utf-8')
+        # 写入内容到文件
+        json_file.write(json_content)
+        json_file.close()
+        html_file.write(html)
+        html_file.close()
+        # 清理我们创建的临时目录
+        yield None, None, None, "🧹 Cleaning up temporary files..."
+        shutil.rmtree(temp_dir)
+        # 最终完成
+        yield (
+            [json_file.name, html_file.name],
+            json_content,
+            html,
+            "✅ Poster generated successfully! 🎉\n📥 Files are ready for download\n🎨 HTML preview is displayed below\n💡 Download the HTML file for best viewing experience"
+        )
+    except Exception as e:
+        error_msg = f"❌ Error occurred during processing: {str(e)}"
+        yield None, None, None, error_msg
+# 创建Gradio界面
+def create_interface():
+    # JavaScript代码强制启用Light模式
+    js_func = """
+    function refresh() {
+        const url = new URL(window.location);
+        if (url.searchParams.get('__theme') !== 'light') {
+            url.searchParams.set('__theme', 'light');
+            window.location.href = url.href;
+        }
+    }
+    """
+    with gr.Blocks(
+        title="P2P: Paper-to-Poster Generator",
+        theme=gr.themes.Default(),  # 使用Light主题
+        js=js_func,  # 添加JavaScript强制Light模式
+        css="""
+        .gradio-container {
+            max-width: 1600px !important;
+        }
+        .title {
+            text-align: center;
+            margin-bottom: 1rem;
+        }
+        .preview-container {
+            min-height: 1000px;
+            max-height: 1500px;
+            overflow-y: auto;
+            border: 1px solid #e0e0e0;
+            border-radius: 8px;
+            padding: 15px;
+            background-color: #fafafa;
+        }
+        .preview-container iframe {
+            width: 100% !important;
+            min-height: 1000px !important;
+        }
+        .config-section {
+            margin-bottom: 2rem;
+        }
+        .status-updating {
+            color: #2563eb;
+            font-weight: 500;
+        }
+        """
+    ) as demo:
+        gr.HTML("""
+        <div class="title">
+            <h1>🎓 P2P: Paper-to-Poster Generator</h1>
+            <p>Automatically convert academic papers into professional conference posters ✨</p>
+            <p><a href="https://arxiv.org/abs/2505.17104" target="_blank">📄 View Research Paper</a></p>
+        </div>
+        """)
+        # 配置区域 - 水平布局
+        with gr.Row(elem_classes=["config-section"]):
+            with gr.Column(scale=1):
+                gr.Markdown("### 📥 Input Configuration")
+                # 文件上传
+                pdf_input = gr.File(
+                    label="Upload PDF Paper File",
+                    file_types=[".pdf"],
+                    file_count="single"
+                )
+                # OpenAI API配置
+                gr.Markdown("#### 🔑 OpenAI API Configuration")
+                openai_api_key = gr.Textbox(
+                    label="OpenAI API Key",
+                    placeholder="sk-...",
+                    type="password",
+                    info="Enter your OpenAI API key"
+                )
+                openai_base_url = gr.Textbox(
+                    label="OpenAI Base URL (Optional)",
+                    placeholder="https://api.openai.com/v1",
+                    value="https://api.openai.com/v1",
+                    info="Modify this URL if using proxy or other OpenAI-compatible services"
+                )
+            with gr.Column(scale=1):
+                gr.Markdown("### ⚙️ Model Configuration")
+                # 模型选择
+                model_choice = gr.Textbox(
+                    label="AI Model Name",
+                    value="gpt-4o-mini",
+                    placeholder="e.g., gpt-4o-mini, gpt-4o, gpt-3.5-turbo, claude-3-sonnet",
+                    info="Enter the AI model name you want to use"
+                )
+                # 图片检测服务URL
+                figure_url = gr.Textbox(
+                    label="Figure Detection Service URL",
+                    placeholder="Enter the URL of figure detection service",
+                    info="Used to extract images and tables from PDF"
+                )
+                # 生成按钮
+                generate_btn = gr.Button(
+                    "🚀 Generate Poster",
+                    variant="primary",
+                    size="lg"
+                )
+            with gr.Column(scale=1):
+                gr.Markdown("### 📤 Results & Downloads")
+                # 状态消息
+                status_msg = gr.Textbox(
+                    label="Status Information",
+                    interactive=False,
+                    lines=4,
+                    show_copy_button=True
+                )
+                # 文件下载
+                output_files = gr.File(
+                    label="📥 Download Generated Files (JSON & HTML)",
+                    file_count="multiple",
+                    interactive=False,
+                    show_label=True
+                )
+                # JSON预览 - 压缩到侧边栏
+                with gr.Accordion("📋 JSON Structure", open=False):
+                    json_preview = gr.Code(
+                        label="",
+                        language="json",
+                        lines=10,
+                        show_label=False
+                    )
+        # 预览区域 - 跨栏全宽显示
+        gr.Markdown("### 🎨 HTML Poster Preview")
+        gr.Markdown("**💡 Recommended: Download the HTML file from above and open it in your browser for optimal viewing experience**")
+        # HTML预览 - 全宽显示
+        html_preview = gr.HTML(
+            label="",
+            show_label=False,
+            elem_classes=["preview-container"]
+        )
+        # 使用说明
+        gr.Markdown("""
+        ### 📖 Usage Instructions
+        1. **Upload PDF File**: Select the academic paper PDF you want to convert
+        2. **Configure OpenAI API**: Enter your API Key and Base URL (if needed)
+        3. **Select Model**: Enter model name manually, such as gpt-4o-mini, gpt-4o, claude-3-sonnet, etc.
+        4. **Set Figure Service**: Enter the URL of the figure detection service
+        5. **Generate Poster**: Click the generate button and wait for processing
+        6. **Download Results**: Download the generated JSON and HTML files from the download section
+        7. **Full Preview**: Download the HTML file and open it in your browser for the best viewing experience
+        ⚠️ **Important Notes**:
+        - Generated Poster is recommended to be viewed in fullscreen mode or download the HTML file to view in browser
+        - Requires a valid OpenAI API key with sufficient balance
+        - Figure detection service URL is required for extracting images from PDFs
+        - Processing time depends on paper length and complexity (usually 1-3 minutes)
+        - Ensure the model name is correct and supported by your API
+        - Download the HTML file and open it in your browser for the best viewing experience
+        💡 **Tips**:
+        - Recommended to use gpt-4o-mini model for cost-effective testing
+        - Recommended to use Claude model for better performance
+        - Modify Base URL if using domestic proxy services
+        - Supports any OpenAI-compatible model names
+        - Can use Claude, Gemini and other models (requires corresponding API configuration)
+        - The HTML preview below shows how your poster will look with maximum width for better viewing
+        - Download the HTML file from the "Download Generated Files" section for standalone viewing
+        """)
+        # 绑定事件
+        generate_btn.click(
+            fn=process_paper_to_poster,
+            inputs=[
+                pdf_input,
+                model_choice,
+                figure_url,
+                openai_api_key,
+                openai_base_url
+            ],
+            outputs=[
+                output_files,
+                json_preview,
+                html_preview,
+                status_msg
+            ]
+        )
+    return demo
+if __name__ == "__main__":
+    demo = create_interface()
+    demo.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        share=False
+    )

figure_detection.py ADDED Viewed

	@@ -0,0 +1,90 @@

+import modal
+from io import BytesIO
+from pathlib import Path
+from fastapi import File, UploadFile, Form
+image = (
+    modal.Image.debian_slim(python_version="3.10")
+    .apt_install(["libgl1-mesa-glx", "libglib2.0-0"])  # install system libraries for graphics handling
+    .pip_install(
+        "ultralytics>=8.2.85",
+        "doclayout-yolo==0.0.2",
+        "huggingface-hub",
+        "fastapi",
+    )
+)
+volume = modal.Volume.from_name("yolo-layout-detection", create_if_missing=True)
+volume_path = Path("/root") / "data"
+model_path = volume_path / "path2doclayout_yolo_ft.pt"
+app = modal.App(
+    "yolo-layout-detection-temp",
+    image=image,
+    volumes={volume_path: volume},
+)
+@app.function()
+def download_model():
+    from huggingface_hub import snapshot_download
+    snapshot_download(
+        repo_id="opendatalab/pdf-extract-kit-1.0",
+        local_dir=volume_path,
+        allow_patterns='path2*',
+        max_workers=20,
+    )
+@app.cls(gpu="a10g")
+class LayoutDetection:
+    @modal.enter()
+    def load_model(self):
+        from doclayout_yolo import YOLOv10
+        self.model = YOLOv10(model_path)
+    @modal.web_endpoint(method="POST", docs=True)
+    async def predict(self, img: UploadFile = File(...), task: str = Form(...)):
+        from PIL import Image
+        img_bytes = await img.read()
+        img = Image.open(BytesIO(img_bytes))
+        results = self.model.predict(img)
+        # parse results
+        figs = []
+        for result in results:
+            boxes = result.__dict__['boxes'].xyxy.cpu().tolist()
+            classes = result.__dict__['boxes'].cls.cpu().tolist()
+            scores = result.__dict__['boxes'].conf.cpu().tolist()
+            targets, captions = [], []
+            for box, cls, score in zip(boxes, classes, scores):
+                if task == "figure":
+                    if cls == 3:
+                        targets.append({"box": box, "score": score})
+                elif task == "table":
+                    if cls == 5:
+                        targets.append({"box": box, "score": score})
+                elif task == "figurecaption":
+                    if cls == 3:
+                        targets.append({"box": box, "score": score})
+                    elif cls == 4:
+                        captions.append({"box": box, "score": score})
+                elif task == "tablecaption":
+                    if cls == 5:
+                        targets.append({"box": box, "score": score})
+                    elif cls == 6 or cls == 7:
+                        captions.append({"box": box, "score": score})
+            if not captions:
+                figs = targets
+            else:
+                matches = []
+                for target in targets:
+                    min_distance = float('inf')
+                    for caption in captions:
+                        target_box, caption_box = target["box"], caption["box"]
+                        distance = abs(target_box[0] - caption_box[0]) + abs(target_box[3] - caption_box[1])
+                        if distance < min_distance:
+                            min_distance = distance
+                            correct_match = (target, caption)
+                    matches.append(correct_match)
+                for target, caption in matches:
+                    target_box, caption_box = target["box"], caption["box"]
+                    union_box = [
+                        min(target_box[0], caption_box[0]),
+                        min(target_box[1], caption_box[1]),
+                        max(target_box[2], caption_box[2]),
+                        max(target_box[3], caption_box[3]),
+                    ]
+                    figs.append({"box": union_box, "score": 1.0})
+        return figs

main.py ADDED Viewed

	@@ -0,0 +1,120 @@

+import base64
+import copy
+import json
+import fire
+import os
+import pathlib
+from poster.figures import extract_figures
+from poster.poster import (
+    generate_html_v2,
+    generate_poster_v3,
+    replace_figures_in_poster,
+    replace_figures_size_in_poster,
+    take_screenshot,
+)
+def generate_paper_poster(
+    url: str,
+    pdf: str,
+    vendor: str = "openai",
+    model: str = "gpt-4o-mini",
+    text_prompt: str = "",
+    figures_prompt: str = "",
+    output: str = "poster.json",
+):
+    """Generate a paper poster
+    Args:
+        url: URL of the PDF file
+        pdf: Local path of the PDF file
+        model: Name of the model to use, default is gpt-4o-mini
+        text_prompt: Text prompt template,
+        figures_prompt: Figures prompt template,
+        output: Output file path, default is poster.json
+    """
+    pdf_stem = pdf.replace(".pdf", "")
+    figures_cache = f"{pdf_stem}_figures.json"
+    figures_cap_cache = f"{pdf_stem}_figures_cap.json"
+    figures = []
+    figures_cap = []
+    print("开始提取图片...")
+    if os.path.exists(figures_cache) and os.path.exists(figures_cap_cache):
+        print(f"使用缓存的图片: {figures_cache}")
+        with open(figures_cache, "r") as f:
+            figures = json.load(f)
+        with open(figures_cap_cache, "r") as f:
+            figures_cap = json.load(f)
+    else:
+        figures_img = extract_figures(url, pdf, task="figure")
+        figures_table = extract_figures(url, pdf, task="table")
+        img_caption = extract_figures(url, pdf, task="figurecaption")
+        table_caption = extract_figures(url, pdf, task="tablecaption")
+        threshold = 0.85
+        while True:
+            figures = [
+                image
+                for image, score in figures_img + figures_table
+                if score >= threshold
+            ]
+            figures_cap = [
+                image
+                for image, score in img_caption + table_caption
+                if score >= threshold
+            ]
+            print(f"{threshold:.2f} 提取到 {len(figures)} / {len(figures_cap)} 张图像")
+            if len(figures) == len(figures_cap):
+                break
+            threshold -= 0.05
+        with open(figures_cache, "w") as f:
+            json.dump(figures, f, ensure_ascii=False)
+        with open(figures_cap_cache, "w") as f:
+            json.dump(figures_cap, f, ensure_ascii=False)
+    while True:
+        try:
+            result = generate_poster_v3(
+                vendor, model, text_prompt, figures_prompt, pdf, figures_cap, figures
+            )
+            poster = result["image_based_poster"]
+            backup_poster = copy.deepcopy(poster)
+            poster = replace_figures_in_poster(poster, figures)
+            # with open(output, "w") as f:
+            #     json.dump(poster.model_dump(), f, ensure_ascii=False)
+            poster_size = replace_figures_size_in_poster(backup_poster, figures)
+            print("Now generating HTML...")
+            result = generate_html_v2(vendor, model, poster_size, figures)
+            html = result["html_with_figures"]
+            # with open(output.replace(".json", ".html"), "w") as f:
+            #     f.write(html)
+            # take_screenshot(output, html)
+            return poster, html
+        except Exception as e:
+            if (
+                "content management policy" in str(e)
+                or "message larger than max" in str(e)
+                or "exceeds the maximum length" in str(e)
+                or "maximum context length" in str(e)
+                or "Input is too long" in str(e)
+                or "image exceeds 5 MB" in str(e)
+                or "too many total text bytes" in str(e)
+                or "Range of input length" in str(e)
+                or "Invalid text" in str(e)
+            ):
+                raise
+            print(f"处理文件 {pdf} 时出错: {e}")
+if __name__ == "__main__":
+    fire.Fire(generate_paper_poster)

poster/__init__.py ADDED Viewed

File without changes

poster/compress.py ADDED Viewed

	@@ -0,0 +1,36 @@

+import base64
+import io
+from PIL import Image
+def compress_image(base64_str, quality=85, max_size=(1024, 1024)):
+    """
+    压缩base64编码的图片
+    参数:
+        base64_str: base64编码的图片字符串
+        quality: 压缩质量 (1-100)
+        max_size: 最大尺寸 (宽, 高)
+    返回:
+        压缩后的base64编码字符串
+    """
+    try:
+        img_data = base64.b64decode(base64_str)
+        img = Image.open(io.BytesIO(img_data))
+        if img.width > max_size[0] or img.height > max_size[1]:
+            img.thumbnail(max_size, Image.LANCZOS)
+        output = io.BytesIO()
+        img.save(output, format="PNG", optimize=True, quality=quality)
+        compressed_base64 = base64.b64encode(output.getvalue()).decode("utf-8")
+        return compressed_base64
+    except Exception as e:
+        print(f"图片压缩失败: {e}")
+        return base64_str  # 如果压缩失败，返回原始图片

poster/figures.py ADDED Viewed

	@@ -0,0 +1,73 @@

+import base64
+import requests
+import os
+from pathlib import Path
+from io import BytesIO
+from PIL import Image
+from retry import retry
+from .loader import ImagePDFLoader
+@retry(tries=3)
+def _extract_figures(
+    url: str, img: Image.Image, task: str = "figure"
+) -> list[tuple[Image.Image, float]]:
+    figures = []
+    with BytesIO() as buffer:
+        img.save(buffer, format="PNG")
+        files = [("img", ("image.png", buffer.getvalue(), "image/png"))]
+        payload = {"task": task}
+        rsp = requests.request("POST", url, data=payload, files=files)
+        rsp.raise_for_status()
+    for data in rsp.json():
+        figures.append((img.crop(data["box"]), data["score"]))
+    return figures
+def extract_figures(
+    url: str, pdf: str, task: str = "figure"
+) -> list[tuple[str, float]]:
+    loader = ImagePDFLoader(pdf)
+    images = loader.load()
+    figures = []
+    for image in images:
+        figures.extend(_extract_figures(url, image, task))
+    base64_figures = []
+    for figure, score in figures:
+        with BytesIO() as buffer:
+            figure.save(buffer, format="PNG")
+            base64_figures.append(
+                (base64.b64encode(buffer.getvalue()).decode("utf-8"), score)
+            )
+    return base64_figures
+if __name__ == "__main__":
+    url = ""
+    pdf = "1.pdf"
+    output_dir = Path("output")
+    output_dir.mkdir(exist_ok=True)
+    base64_figures = extract_figures(url, pdf, task="figurecaption")
+    print(f"提取到 {len(base64_figures)} 张图像")
+    for i, (b64_str, score) in enumerate(base64_figures):
+        img_data = base64.b64decode(b64_str)
+        img = Image.open(BytesIO(img_data))
+        output_path = output_dir / f"figure_{i + 1}.png"
+        img.save(output_path)
+        print(f"图像已保存到: {output_path}")
+    print(f"所有图像已保存到 {output_dir} 目录")

poster/loader.py ADDED Viewed

	@@ -0,0 +1,27 @@

+import fitz
+from PIL import Image
+from langchain_community.document_loaders import PyMuPDFLoader
+class ImagePDFLoader(PyMuPDFLoader):
+    def load_pdf_page(self, page: fitz.Page, dpi: int) -> Image.Image:
+        pix = page.get_pixmap(matrix=fitz.Matrix(dpi / 72, dpi / 72))
+        image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
+        if pix.width > 3000 or pix.height > 3000:
+            pix = page.get_pixmap(matrix=fitz.Matrix(1, 1), alpha=False)
+            image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
+        return image
+    def load(self) -> list[Image.Image]:
+        images = []
+        doc = fitz.open(self.file_path)
+        for i in range(len(doc)):
+            page = doc[i]
+            image = self.load_pdf_page(page, dpi=250)
+            images.append(image)
+        return images

poster/poster.py ADDED Viewed

	@@ -0,0 +1,730 @@

+import base64
+import io
+import json
+import os
+import re
+import subprocess
+import time
+import cairosvg
+from PIL import Image
+from pdf2image import convert_from_path
+from playwright.sync_api import sync_playwright
+from pydantic import BaseModel, Field, create_model
+from tqdm import tqdm
+from langchain import hub
+# from langchain_google_genai import ChatGoogleGenerativeAI
+from langchain_openai import ChatOpenAI, AzureChatOpenAI
+from langchain_openai.chat_models.base import BaseChatOpenAI
+from langchain_community.document_loaders import PyMuPDFLoader
+from langchain_core.prompts import (
+    HumanMessagePromptTemplate,
+    SystemMessagePromptTemplate,
+    ChatPromptTemplate,
+    MessagesPlaceholder,
+    PromptTemplate,
+)
+from langchain_core.prompts.image import ImagePromptTemplate
+from langchain_core.output_parsers import PydanticOutputParser
+from langchain_core.exceptions import OutputParserException
+from langchain.output_parsers import OutputFixingParser
+from langchain_core.messages import HumanMessage, SystemMessage, AIMessage
+def create_dynamic_poster_model(sections: dict[str, str]) -> type[BaseModel]:
+    """Dynamically create a Poster model based on sections returned by LLM."""
+    fields = {
+        "title": (str, Field(default="", description="Title of the paper")),
+        "authors": (str, Field(default="", description="Authors of the paper")),
+        "affiliation": (
+            str,
+            Field(default="", description="Affiliation of the authors"),
+        ),
+    }
+    for section_name, description in sections.items():
+        fields[section_name] = (str, Field(default="", description=description))
+    return create_model("DynamicPoster", **fields)
+def remove_think_tags(llm_output):
+    if hasattr(llm_output, "content"):
+        content = llm_output.content
+        cleaned_content = re.sub(r"<think>.*?</think>", "", content, flags=re.DOTALL)
+        cleaned_content = re.sub(r"<think>.*", "", cleaned_content, flags=re.DOTALL)
+        return AIMessage(content=cleaned_content)
+    elif isinstance(llm_output, str):
+        cleaned_output = re.sub(r"<think>.*?</think>", "", llm_output, flags=re.DOTALL)
+        cleaned_output = re.sub(r"<think>.*", "", cleaned_output, flags=re.DOTALL)
+        return cleaned_output
+    return llm_output
+def replace_figures_in_markdown(
+    markdown: str,
+    figures: list[str],
+) -> str:
+    pattern = r"!\[(.*?)\]\((\d+)\)"
+    def replacer(match):
+        figure_index = int(match.group(2))
+        if 0 <= figure_index < len(figures):
+            return f"![{match.group(1)}]({figures[figure_index]})"
+        return match.group(0)
+    return re.sub(pattern, replacer, markdown)
+def replace_figures_in_poster(
+    poster: BaseModel,
+    figures: list[str],
+) -> BaseModel:
+    for field in poster.model_fields:
+        if hasattr(poster, field):
+            value = getattr(poster, field)
+            if isinstance(value, str):
+                setattr(poster, field, replace_figures_in_markdown(value, figures))
+    return poster
+def replace_figures_size_in_markdown(
+    markdown: str,
+    figures: list[str],
+) -> str:
+    pattern = r"!\[(.*?)\]\((\d+)\)"
+    def replacer(match):
+        figure_index = int(match.group(2))
+        if 0 <= figure_index < len(figures):
+            data = base64.b64decode(figures[figure_index])
+            image = Image.open(io.BytesIO(data))
+            width, height = image.size
+            return f"![{match.group(1)}, width = {width}, height = {height}, aspect ratio = {width / height:.4f}]({match.group(2)})"
+        return match.group(0)
+    return re.sub(pattern, replacer, markdown)
+def replace_figures_size_in_poster(
+    poster: BaseModel,
+    figures: list[str],
+) -> BaseModel:
+    for field in poster.model_fields:
+        if hasattr(poster, field):
+            value = getattr(poster, field)
+            if isinstance(value, str):
+                setattr(poster, field, replace_figures_size_in_markdown(value, figures))
+    return poster
+def replace_figures_in_html(html: str, figures: list[str]) -> str:
+    pattern = r"src=\"(\d+)\""
+    def replacer(match):
+        figure_index = int(match.group(1))
+        if 0 <= figure_index < len(figures):
+            return f'src="data:image/png;base64,{figures[figure_index]}"'
+        return match.group(0)
+    return re.sub(pattern, replacer, html)
+def get_sizes(type: str, html: str) -> list[list[dict]]:
+    with sync_playwright() as p:
+        browser = p.chromium.launch(headless=True)
+        page = browser.new_page()
+        page.set_content(html)
+        contents = page.query_selector_all(f".{type}-content")
+        content_sizes = []
+        for content in contents:
+            groups = content.query_selector_all(f"> *")
+            group_sizes = []
+            for group in groups:
+                is_group = group.evaluate(
+                    f"element => element.classList.contains('{type}-group')"
+                )
+                if not is_group:
+                    bounding_box = group.bounding_box()
+                    group_sizes.append(
+                        [
+                            {
+                                "width": bounding_box["width"],
+                                "height": bounding_box["height"],
+                            }
+                        ]
+                    )
+                    continue
+                group.evaluate("(element) => element.style.alignItems = 'start'")
+                columns = group.query_selector_all(f".{type}-column")
+                column_sizes = []
+                for column in columns:
+                    bounding_box = column.bounding_box()
+                    column_sizes.append(
+                        {
+                            "width": bounding_box["width"],
+                            "height": bounding_box["height"],
+                        }
+                    )
+                group_sizes.append(column_sizes)
+            content_sizes.append(group_sizes)
+        browser.close()
+        return content_sizes
+def generate_html_v2(vendor: str, model: str, poster: BaseModel, figures: list[str]):
+    if vendor == "openai":
+        if "o1" in model or "o3" in model or "o4" in model:
+            llm = ChatOpenAI(
+                model=model,
+                temperature=1,
+                max_tokens=8000,
+            )
+        else:
+            llm = BaseChatOpenAI(
+                model=model,
+                temperature=1,
+                max_tokens=8000,
+                # model_kwargs={
+                #     "extra_body": {"chat_template_kwargs": {"enable_thinking": False}}
+                # },
+            )
+    style = """<style>
+      html {
+        font-family: "Times New Roman", Times, serif;
+        font-size: 16px;
+      }
+      body {
+        width: 1280px;
+        margin: 0;
+      }
+      ol,
+      ul {
+        margin-left: 0.5rem;
+      }
+      li {
+        margin-bottom: 0.5rem;
+      }
+      img {
+        width: calc(100% - 2rem);
+        margin: 0.5rem 1rem;
+      }
+      .poster-header {
+        padding: 2rem;
+        text-align: center;
+      }
+      .poster-title {
+        margin-bottom: 1rem;
+        font-size: 1.875rem;
+        font-weight: bold;
+      }
+      .poster-author {
+        margin-bottom: 0.5rem;
+      }
+      .poster-content {
+        padding: 1rem;
+      }
+      .section {
+        margin-bottom: 1rem;
+      }
+      .section-title {
+        padding: 0.5rem 1rem;
+        font-weight: bold;
+      }
+      .section-content {
+        margin: 0 1rem;
+      }
+    </style>
+"""
+    layout_prompt = ChatPromptTemplate.from_messages(
+        [
+            SystemMessage(
+                content="You are a professional academic poster web page creator and your task is to generate the HTML code for a nicely laid out academic poster web page based on the object provided."
+            ),
+            HumanMessagePromptTemplate.from_template(
+                """# Object Description
+- The object contains several fields. Each field represents a section, except for the title, author and affiliation fields. The field name is the title of the section and the field value is the Markdown content of the section.
+- The image in Markdown is given in the format ![alt_text, width = original_width, height = original_height, aspect ratio = aspect_ratio](image_index).
+# HTML Structure
+- Only generate the HTML code inside <body>, without any other things.
+- Place title, author and affiliation inside <div class="poster-header">. Place title inside <div class="poster-title">, author inside <div class="poster-author"> and affiliation inside <div class="poster-affiliation">.
+- Place content inside <div class="poster-content">.
+- Place each section inside <div class="section">. Place section title inside <div class="section-title"> and section content inside <div class="section-content">.
+- Use <p> for paragraphs.
+- Use <ol> and <li> for ordered lists, and <ul> and <li> for unordered lists.
+- Use <img src="image_index" alt="alt_text"> for images.
+- Use <strong> for bold text and <em> for italic text.
+- Do not use tags other than <div>, <p>, <ol>, <ul>, <li>, <img>, <strong>, <em>.
+- Do not create any sections that are not in the object. Do not split or merge any existing sections.
+- Sections and contents should be strictly equal to the object, and should be placed strictly in the order of the object.
+# Color Specification
+- Select at least 2 colors from the visual identity of the affiliation. If there are multiple affiliations, consider the most well-known one.
+- For example, Tsinghua University uses #660874 and #d93379, Beihang University uses #005bac and #003da6, Zhejiang University uses #003f88 and #b01f24. These are just examples, you must pick colors from the actual visual identity of the affiliation.
+- Add text and background color to poster header and section title using inline style. Use gradient to make the poster more beautiful.
+- The text and background color of each section title should be the same.
+- Do not add styles other than color, background, border, box-shadow.
+- Do not add styles like width, height, padding, margin, font-size, font-weight, border-radius.
+# Layout Specification
+- Optionally, inside <div class="poster-content">, group sections into columns using <div class="poster-group" style="display: flex; gap: 1rem"> and <div class="poster-column" style="flex: 1">.
+- You must determine the optimal number and flex grow value of columns to create a balanced poster layout. If one column becomes too tall, redistribute sections to other columns.
+- There can be multiple groups with different number and flex grow of columns.
+- Optionally, inside <div class="section-content">, group texts and images into columns using <div class="section-group" style="display: flex; gap: 0.5rem"> and <div class="section-column" style="flex: 1">.
+- For example, if there are two images in two columns whose aspect ratios are 1.2 and 2 respectively, the flex grow of two columns should be 1.2 and 2 respectively, to make the columns have the same height.
+- Calculate the size of each image based on column width and aspect ratios. Add comment <!-- width = display_width, height = display_height --> before each image.
+- Rearrange the structure and order of sections, texts and images to make the height of each column in the same group approximately the same.
+- For example, if there are too many images in one section that make the height of the column too large, group the images into columns.
+- The display width of each image should not be too large or too small compared to its original width.
+- DO NOT LEAVE MORE THAN 5% BLANK SPACE IN THE POSTER.
+- Use a 3-column or 4-column layout with a landscape (horizontal) orientation for optimal visual presentation.
+# Output Requirement
+- Please output the result in the following format:
+  <think>
+    Think step by step, considering all structures and specifications listed above one by one.
+    Calculate the width and height of each column, text and image in detail, based on given style.
+  </think>
+  ```html
+    HTML code inside <body>.
+  ```
+- Please make the content in <think> as detailed and comprehensive as possible.
+# Existing Style
+{style}
+# Object
+{poster}
+"""
+            ),
+        ]
+    )
+    layout_chain = layout_prompt | llm
+    output = layout_chain.invoke({"style": style, "poster": poster}).content
+    layout_prompt.append(
+        MessagesPlaceholder(variable_name="react"),
+    )
+    HTML_TEMPLATE = """<!DOCTYPE html>
+<html>
+  <head>
+    <title>Poster</title>
+    {style}
+    <script>
+      MathJax = {{ tex: {{ inlineMath: [["$", "$"]] }} }};
+    </script>
+    <script
+      id="MathJax-script"
+      async
+      src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"
+    ></script>
+  </head>
+  <body>
+    {body}
+  </body>
+</html>
+"""
+    def get_content_sizes(sizes: list[list[dict]]) -> float:
+        """Calculate the total content size from the sizes data structure"""
+        return sum(
+            column["width"] * column["height"]
+            for content in sizes
+            for group in content
+            for column in group
+        )
+    def get_total_size(sizes: list[list[dict]]) -> float:
+        """Calculate the total size including spacing from the sizes data structure"""
+        return sum(
+            (
+                sum(column["width"] for column in group)
+                * max((column["height"] for column in group), default=0)
+            )
+            for content in sizes
+            for group in content
+        )
+    def calculate_blank_proportion(poster_sizes, section_sizes) -> float:
+        """Calculate the proportion of blank space in the poster"""
+        poster_content_sizes = get_content_sizes(poster_sizes)
+        section_content_sizes = get_content_sizes(section_sizes)
+        poster_total_size = get_total_size(poster_sizes)
+        section_total_size = get_total_size(section_sizes)
+        if poster_total_size == 0:
+            return 1.0
+        return (
+            1.0
+            - (poster_content_sizes - (section_total_size - section_content_sizes))
+            / poster_total_size
+        )
+    max_attempts = 5
+    attempt = 1
+    while True:
+        body = re.search(r"```html\n(.*?)\n```", output, re.DOTALL).group(1)
+        html = HTML_TEMPLATE.format(style=style, body=body)
+        html_with_figures = replace_figures_in_html(html, figures)
+        poster_sizes = get_sizes("poster", html_with_figures)
+        section_sizes = get_sizes("section", html_with_figures)
+        proportion = calculate_blank_proportion(poster_sizes, section_sizes)
+        if proportion <= 0.1:
+            print(
+                f"Attempted {attempt} times, remaining {proportion:.0%} blank spaces."
+            )
+            return {"html": html, "html_with_figures": html_with_figures}
+        attempt += 1
+        if attempt > max_attempts:
+            raise ValueError(f"Invalid blank spaces: {proportion:.0%}")
+        react = [
+            # AIMessage(""),
+            HumanMessage(
+                content=f"""# Previous Body
+{body}
+# Previous Size of Columns in Poster
+{poster_sizes}
+# Previous Size of Columns in Section
+{section_sizes}
+Now there are {proportion:.0%} blank spaces. Please regenerate the content to create a more balanced poster layout.
+"""
+            ),
+        ]
+        output = layout_chain.invoke(
+            {"style": style, "poster": poster, "react": react}
+        ).content
+def take_screenshot(output: str, html: str):
+    with sync_playwright() as p:
+        browser = p.chromium.launch(headless=True)
+        page = browser.new_page(viewport={"width": 1280, "height": 100})
+        page.set_content(html)
+        page.screenshot(
+            type="png", path=output.replace(".json", ".png"), full_page=True
+        )
+        browser.close()
+def replace_figures_in_svg(svg: str, figures: list[str]) -> str:
+    pattern = r"href=\"(\d+)\""
+    def replacer(match):
+        figure_index = int(match.group(1))
+        if 0 <= figure_index < len(figures):
+            return f'href="data:image/png;base64,{figures[figure_index]}"'
+        return match.group(0)
+    return re.sub(pattern, replacer, svg)
+def svg_to_png(output: str, svg: str):
+    cairosvg.svg2png(
+        bytestring=svg.encode("utf-8"),
+        write_to=output.replace(".json", ".png"),
+        output_width=7000,
+    )
+def replace_figures_in_latex(latex: str, figures: list[str]) -> str:
+    pattern = r"\\includegraphics(\[.*?\])?\{(\d+)\}"
+    def replacer(match):
+        figure_index = int(match.group(2))
+        options = match.group(1) or ""
+        if 0 <= figure_index < len(figures):
+            return f"\\includegraphics{options}{{figure_{figure_index}.png}}"
+        return match.group(0)
+    return re.sub(pattern, replacer, latex)
+def latex_to_png(output: str, latex: str):
+    subprocess.run(
+        [
+            "pdflatex",
+            "-interaction=nonstopmode",
+            f"-output-directory={os.path.dirname(output)}",
+            output.replace(".json", ".tex"),
+        ],
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE,
+    )
+    images = convert_from_path(output.replace(".json", ".pdf"), dpi=300)
+    images[0].save(output.replace(".json", ".png"))
+def generate_poster_v3(
+    vendor: str,
+    model: str,
+    text_prompt: str,
+    figures_prompt: str,
+    pdf: str,
+    figures: list[str],
+    figures_index: list[str],
+) -> dict:
+    # Setup LLM
+    if vendor == "openai":
+        if "o1" in model or "o3" in model or "o4" in model:
+            llm = ChatOpenAI(
+                model=model,
+                temperature=1,
+                max_tokens=8000,
+            )
+        else:
+            llm = BaseChatOpenAI(
+                model=model,
+                temperature=1,
+                max_tokens=8000,
+                # model_kwargs={
+                #     "extra_body": {"chat_template_kwargs": {"enable_thinking": False}}
+                # },
+            )
+    loader = PyMuPDFLoader(pdf)
+    pages = loader.load()
+    paper_content = "\n".join([page.page_content for page in pages])
+    from .compress import compress_image
+    figure_messages = [
+        HumanMessagePromptTemplate(
+            prompt=[
+                ImagePromptTemplate(
+                    input_variables=["figure"],
+                    template={"url": "data:image/png;base64,{figure}"},
+                ),
+            ],
+        ).format(figure=compress_image(figure, quality=85, max_size=(64, 64)))
+        for figure in figures
+    ]
+    json_format_example = """
+```json
+{{
+    "Introduction": "Brief overview of the paper's main topic and objectives.",
+    "Methodology": "Description of the methods used in the research.",
+    "Results": "Summary of the key findings and results."
+}}
+```
+"""
+    sections = None
+    for _ in range(5):
+        section_prompt = ChatPromptTemplate.from_messages(
+            [
+                SystemMessage(content="You are an expert in academic paper analysis."),
+                HumanMessagePromptTemplate.from_template(
+                    """Please analyze the paper content and identify the key sections that should be included in the poster.
+For each section, provide a concise description of what should be included. First, determine the paper type:
+- For methodology research papers: Focus on method description, experimental results, and research methodology.
+- For benchmark papers: Highlight task definitions, dataset construction, and evaluation outcomes.
+- For survey/review papers: Emphasize field significance, key developmental milestones, critical theories/techniques, current challenges, and emerging trends.
+Note that the specific section names should be derived from the paper's content. Related sections can be combined to avoid fragmentation. Limit the total number of sections to maintain clarity. Do not include acknowledgements or references sections.
+Return the result as a flat JSON object with section names as keys and descriptions as values, without nested structures. You MUST use Markdown code block syntax with the json language specifier.
+Example format:
+{json_format_example}
+Paper content:
+{paper_content}
+"""
+                ),
+            ]
+        )
+        sections_response = llm.invoke(
+            section_prompt.format(
+                json_format_example=json_format_example, paper_content=paper_content
+            )
+        )
+        json_pattern = r"```json(.*?)```"
+        match = re.search(json_pattern, sections_response.content, re.DOTALL)
+        if match:
+            json_content = match.group(1)
+        else:
+            continue
+        try:
+            sections = eval(json_content.strip())
+            if all(
+                isinstance(k, str) and isinstance(v, str) for k, v in sections.items()
+            ):
+                break
+        except Exception:
+            continue
+    if sections is None:
+        raise ValueError("Failed to retrieve valid sections from LLM response.")
+    DynamicPoster = create_dynamic_poster_model(sections)
+    figures_description_prompt = ChatPromptTemplate.from_messages(
+        [
+            SystemMessage(
+                content="You are an academic image analysis expert. Provide concise descriptions (under 100 words) of academic figures, diagrams, charts, or images. Identify what the figure displays, its likely purpose in academic literature, and highlight key data points or trends. Focus on clarity and academic relevance while maintaining precision in your analysis."
+            ),
+            HumanMessagePromptTemplate(
+                prompt=[
+                    # PromptTemplate(template="Describe this image:"),
+                    ImagePromptTemplate(
+                        input_variables=["image_data"],
+                        template={"url": "data:image/png;base64,{image_data}"},
+                    ),
+                ],
+            ),
+        ]
+    )
+    use_claude = False
+    mllm = BaseChatOpenAI(
+        temperature=1,
+        max_tokens=8000,
+    )
+    figures_with_descriptions = ""
+    figure_list = []
+    figures_description_cache = pdf.replace(".pdf", "_figures_description.json")
+    if use_claude and os.path.exists(figures_description_cache):
+        with open(figures_description_cache, "r") as f:
+            figures_with_descriptions = f.read()
+    else:
+        figure_chain = figures_description_prompt | (mllm if use_claude else llm)
+        for i, figure in enumerate(tqdm(figures, desc=f"处理图片 {pdf}")):
+            figure_description_response = figure_chain.invoke({"image_data": figure})
+            figures_with_descriptions += f"""
+<figure_{i}>
+{figure_description_response.content}
+</figure_{i}>
+"""
+            figure_list.append(
+                {"figure": figure, "description": figure_description_response.content}
+            )
+        if use_claude:
+            with open(figures_description_cache, "w") as f:
+                f.write(figures_with_descriptions)
+    text_prompt = ChatPromptTemplate.from_messages(
+        [
+            SystemMessage(
+                content="You are a helpful academic expert, who is specialized in generating a text-based paper poster, from given contents."
+            ),
+            HumanMessagePromptTemplate.from_template(
+                """Below is the figures with descriptions in the paper:
+<figures>
+{figures}
+</figures>
+Below is the content of the paper:
+<paper_content>
+{paper_content}
+</paper_content>
+If figures can effectively convey the poster content, simplify the related text to avoid redundancy. Include essential mathematical formulas where they enhance understanding.
+{format_instructions}
+Ensure all sections are precise, concise, and presented in markdown format without headings."""
+            ),
+        ]
+    )
+    parser = PydanticOutputParser(pydantic_object=DynamicPoster)
+    fixing_parser = OutputFixingParser.from_llm(parser=parser, llm=llm)
+    text_prompt = text_prompt.partial(
+        format_instructions=parser.get_format_instructions()
+    )
+    text_chain = text_prompt | llm | remove_think_tags | parser
+    try:
+        text_poster = text_chain.invoke(
+            {"paper_content": paper_content, "figures": figures_with_descriptions}
+        )
+    except OutputParserException as e:
+        text_poster = fixing_parser.parse(e.llm_output)
+    figures_prompt = ChatPromptTemplate.from_messages(
+        [
+            SystemMessagePromptTemplate.from_template(
+                "You are a helpful academic expert, who is specialized in generating a paper poster, from given contents and figures. "
+            ),
+            HumanMessagePromptTemplate.from_template(
+                """Below is the figures with descriptions in the paper:
+<figures>
+{figures}
+</figures>
+I have already generated a text-based poster as follows:
+<poster_content>
+{poster_content}
+</poster_content>
+The paper content is as follows:
+<paper_content>
+{paper_content}
+</paper_content>
+Insert figures into the poster content using figure index notation as `![figure_description](figure_index)`. For example, `![Overview](0)`.
+The figure_index MUST be an integer starting from 0, and no other text should be used in the figure_index position.
+Each figure should be used at most once, with precise and accurate placement.
+Prioritize pictures and tables based on their relevance and importance to the content.
+{format_instructions}"""
+            ),
+        ]
+    )
+    figures_prompt = figures_prompt.partial(
+        figures=figures_with_descriptions,
+        format_instructions=parser.get_format_instructions(),
+    )
+    figures_chain = figures_prompt | llm | remove_think_tags | parser
+    try:
+        figures_poster = figures_chain.invoke(
+            {"poster_content": text_poster, "paper_content": paper_content}
+        )
+    except OutputParserException as e:
+        figures_poster = fixing_parser.parse(e.llm_output)
+    return {
+        "sections": sections,
+        "figures": figure_list,
+        "text_based_poster": text_poster,
+        "image_based_poster": figures_poster,
+    }

requirements.txt ADDED Viewed

	@@ -0,0 +1,21 @@

+bert_score==0.3.13
+fire==0.7.0
+fitz==0.0.1.dev2
+langchain==0.3.19
+langchain_anthropic==0.3.8
+langchain_community==0.3.18
+langchain_core==0.3.40
+langchain_openai==0.3.7
+numpy==2.2.3
+pdf2image==1.17.0
+Pillow==11.1.0
+pydantic==2.10.6
+PyPDF2==3.0.1
+Requests==2.32.3
+retry==0.9.2
+rouge==1.0.1
+skimage==0.0
+tqdm==4.67.1
+gradio
+cairosvg==2.7.1
+playwright

start.py ADDED Viewed

	@@ -0,0 +1,63 @@

+import logging
+import os
+import glob
+from main import generate_paper_poster
+from tqdm import tqdm
+import concurrent.futures
+def process_papers(input_dir, output_dir, url, model):
+    os.makedirs(output_dir, exist_ok=True)
+    paper_files = os.listdir(input_dir)
+    pdf_files = [
+        os.path.join(input_dir, file, "paper.pdf")
+        for file in paper_files
+        if os.path.isdir(os.path.join(input_dir, file))
+    ]
+    def process_single_pdf(pdf_file):
+        try:
+            file_id = os.path.basename(os.path.dirname(pdf_file))
+            poster_dir = os.path.join(output_dir, file_id)
+            os.makedirs(poster_dir, exist_ok=True)
+            output_file = os.path.join(poster_dir, "poster.json")
+            output_png = os.path.join(poster_dir, "poster.png")
+            if os.path.exists(output_file) and os.path.exists(output_png):
+                print(f"跳过已存在的文件: {output_file}")
+                return
+            generate_paper_poster(
+                url=url,
+                pdf=pdf_file,
+                model=model,
+                output=output_file,
+                text_prompt=" ",
+                figures_prompt=" ",
+            )
+            print(f"成功生成: {output_file}")
+        except Exception as e:
+            print(f"处理文件 {pdf_file} 时出错: {e}")
+    with concurrent.futures.ThreadPoolExecutor(max_workers=16) as executor:
+        futures = [
+            executor.submit(process_single_pdf, pdf_file) for pdf_file in pdf_files
+        ]
+        for _ in tqdm(
+            concurrent.futures.as_completed(futures),
+            total=len(futures),
+            desc=f"处理文件 {model}",
+        ):
+            pass
+if __name__ == "__main__":
+    url = ""
+    input_dir = "eval/data"
+    models = []
+    for model in models:
+        output_dir = f"eval/temp-v2/{model.replace('/', '-')}"
+        process_papers(input_dir, output_dir, url, model)