suntao.0 commited on
Commit
04aed77
·
1 Parent(s): 074e02d
Files changed (13) hide show
  1. .gitignore +175 -0
  2. LICENSE +21 -0
  3. README.md +169 -5
  4. app.py +312 -0
  5. figure_detection.py +90 -0
  6. main.py +120 -0
  7. poster/__init__.py +0 -0
  8. poster/compress.py +36 -0
  9. poster/figures.py +73 -0
  10. poster/loader.py +27 -0
  11. poster/poster.py +730 -0
  12. requirements.txt +21 -0
  13. start.py +63 -0
.gitignore ADDED
@@ -0,0 +1,175 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py,cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ #Pipfile.lock
96
+
97
+ # UV
98
+ # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
99
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
100
+ # commonly ignored for libraries.
101
+ #uv.lock
102
+
103
+ # poetry
104
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
105
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
106
+ # commonly ignored for libraries.
107
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
108
+ #poetry.lock
109
+
110
+ # pdm
111
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
112
+ #pdm.lock
113
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
114
+ # in version control.
115
+ # https://pdm.fming.dev/latest/usage/project/#working-with-version-control
116
+ .pdm.toml
117
+ .pdm-python
118
+ .pdm-build/
119
+
120
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
121
+ __pypackages__/
122
+
123
+ # Celery stuff
124
+ celerybeat-schedule
125
+ celerybeat.pid
126
+
127
+ # SageMath parsed files
128
+ *.sage.py
129
+
130
+ # Environments
131
+ .env
132
+ .venv
133
+ env/
134
+ venv/
135
+ ENV/
136
+ env.bak/
137
+ venv.bak/
138
+
139
+ # Spyder project settings
140
+ .spyderproject
141
+ .spyproject
142
+
143
+ # Rope project settings
144
+ .ropeproject
145
+
146
+ # mkdocs documentation
147
+ /site
148
+
149
+ # mypy
150
+ .mypy_cache/
151
+ .dmypy.json
152
+ dmypy.json
153
+
154
+ # Pyre type checker
155
+ .pyre/
156
+
157
+ # pytype static type analyzer
158
+ .pytype/
159
+
160
+ # Cython debug symbols
161
+ cython_debug/
162
+
163
+ # PyCharm
164
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
165
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
166
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
167
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
168
+ #.idea/
169
+
170
+ # Ruff stuff:
171
+ .ruff_cache/
172
+
173
+ # PyPI configuration file
174
+ .pypirc
175
+ .DS_Store
LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2025 multimodal-art-projection
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
README.md CHANGED
@@ -1,12 +1,176 @@
1
  ---
2
- title: P2P
3
- emoji: 🏆
4
- colorFrom: pink
5
- colorTo: gray
6
  sdk: gradio
7
  sdk_version: 5.31.0
8
  app_file: app.py
9
  pinned: false
 
10
  ---
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: P2P Paper-to-Poster Generator
3
+ emoji: 🎓
4
+ colorFrom: blue
5
+ colorTo: purple
6
  sdk: gradio
7
  sdk_version: 5.31.0
8
  app_file: app.py
9
  pinned: false
10
+ license: mit
11
  ---
12
 
13
+ # P2P: Automated Paper-to-Poster Generation and Fine-Grained Benchmark
14
+
15
+ [![](https://img.shields.io/badge/arXiv-2505.17104-b31b1b.svg?style=for-the-badge)](https://arxiv.org/abs/2505.17104)
16
+
17
+ [![Dataset - P2PInstruct](https://img.shields.io/badge/Dataset-P2PInstruct-blue)](https://huggingface.co/datasets/ASC8384/P2PInstruct)
18
+ [![Dataset - P2PEval](https://img.shields.io/badge/Dataset-P2PEval-blue)](https://huggingface.co/datasets/ASC8384/P2PEval)
19
+
20
+ ## 🚀 Try it on Hugging Face Spaces
21
+
22
+ This application is deployed on Hugging Face Spaces! You can try it directly in your browser without any installation:
23
+
24
+ **🎓 [Launch P2P Paper-to-Poster Generator](https://huggingface.co/spaces/ASC8384/P2P)**
25
+
26
+ ### Quick Start on Spaces:
27
+ 1. Upload your PDF research paper
28
+ 2. Enter your OpenAI API key and base URL (if using proxy)
29
+ 3. Input the AI model name (e.g., gpt-4o-mini, claude-3-sonnet)
30
+ 4. Configure the figure detection service URL
31
+ 5. Click "Generate Poster" and wait for processing
32
+ 6. Preview the generated poster and download JSON/HTML files
33
+
34
+ ⚠️ **Requirements**:
35
+ - Valid OpenAI API key with sufficient balance
36
+ - Figure detection service URL for extracting images from PDFs
37
+ - Compatible AI model (OpenAI, Claude, Gemini, etc.)
38
+
39
+ 💡 **Features**:
40
+ - Real-time HTML poster preview
41
+ - Direct JSON structure display
42
+ - Support for multiple AI models
43
+ - Flexible API configuration
44
+
45
+ ## Overview
46
+
47
+ P2P is an AI-powered tool that automatically converts academic research papers into professional conference posters. This repository contains the code for generating and evaluating these posters, leveraging large language models to extract key information and create visually appealing presentations.
48
+
49
+ The full research paper is available on [arXiv](https://arxiv.org/abs/2505.17104).
50
+
51
+ **Note:** Due to the large size of the evaluation and training datasets, only simple samples are included in this repository. The complete datasets are available on HuggingFace:
52
+ - [P2PInstruct](https://huggingface.co/datasets/ASC8384/P2PInstruct) - Training dataset
53
+ - [P2PEval](https://huggingface.co/datasets/ASC8384/P2PEval) - Benchmark dataset
54
+
55
+ ## Repository Structure
56
+
57
+ ### Core Files
58
+ - `main.py`: Main entry point for generating a poster from a single paper
59
+ - `start.py`: Batch processing script for generating posters from multiple papers
60
+ - `end.py`: Evaluation coordinator that processes generated posters
61
+ - `evalv2.py`: Core evaluation logic with metrics and comparison methods
62
+ - `figure_detection.py`: Utility for detecting and extracting figures from PDFs
63
+
64
+ ### Directories
65
+ - `poster/`: Core poster generation logic
66
+ - `poster.py`: Main poster generation implementation
67
+ - `figures.py`: Figure extraction and processing utilities
68
+ - `compress.py`: Image compression utilities
69
+ - `loader.py`: PDF loading utilities
70
+
71
+ - `eval/`: Evaluation tools and resources
72
+ - `eval_checklist.py`: Checklist-based evaluation implementation
73
+ - `predict_with_xgboost.py`: ML-based poster quality prediction
74
+ - `common.yaml`: Common evaluation parameters
75
+ - `xgboost_model.joblib`: Pre-trained evaluation model
76
+
77
+ ## Requirements
78
+
79
+ - Python 3.10+
80
+ - Dependencies listed in `requirements.txt`
81
+
82
+ ## Setup
83
+
84
+ Install dependencies:
85
+ ```bash
86
+ pip install -r requirements.txt
87
+ playwright install
88
+ ```
89
+
90
+ ## Usage
91
+
92
+ ### Generating a Single Poster
93
+
94
+ To generate a poster from a single paper:
95
+
96
+ ```bash
97
+ # Deploy figure_detection first
98
+ python main.py --url="URL_TO_PDF" --pdf="path/to/paper.pdf" --model="gpt-4o-mini" --output="output/poster.json"
99
+ ```
100
+
101
+ #### Parameters:
102
+ - `--url`: URL for PDF processing service (detecting and extracting figures)
103
+ - `--pdf`: Path to the local PDF file
104
+ - `--model`: LLM model to use (default: gpt-4o-mini)
105
+ - `--output`: Output file path (default: poster.json)
106
+
107
+ #### Output Files:
108
+ - `poster.json`: JSON representation of the poster
109
+ - `poster.html`: HTML version of the poster
110
+ - `poster.png`: PNG image of the poster
111
+
112
+ ### Batch Generating Posters
113
+
114
+ To generate posters for multiple papers:
115
+
116
+ 1. Organize your papers in a directory structure:
117
+ ```
118
+ eval/data/
119
+ └─ paper_id_1/
120
+ └─ paper.pdf
121
+ └─ paper_id_2/
122
+ └─ paper.pdf
123
+ ...
124
+ ```
125
+
126
+ 2. Edit `start.py` to configure:
127
+ - `url`: URL for PDF processing service
128
+ - `input_dir`: Directory containing papers (default: "eval/data")
129
+ - `models`: List of AI models to use for generation
130
+
131
+ 3. Run the batch generation script:
132
+ ```bash
133
+ python start.py
134
+ ```
135
+
136
+ Generated posters will be saved to:
137
+ ```
138
+ eval/temp-v2/{model_name}/{paper_id}/
139
+ └─ poster.json
140
+ └─ poster.html
141
+ └─ poster.png
142
+ ```
143
+
144
+ ### Evaluating Posters
145
+
146
+ To evaluate generated posters:
147
+
148
+ 1. Ensure reference materials exist:
149
+ ```
150
+ eval/data/{paper_id}/
151
+ └─ poster.png (reference poster)
152
+ └─ checklist.yaml (evaluation checklist)
153
+ ```
154
+
155
+ 2. Run the evaluation script:
156
+ ```bash
157
+ python end.py
158
+ ```
159
+
160
+ Evaluation results will be saved to `eval/temp-v2/results.jsonl`.
161
+
162
+ ## Citation
163
+
164
+ If you find our work useful, please consider citing P2P:
165
+
166
+ ```bibtex
167
+ @misc{sun2025p2pautomatedpapertopostergeneration,
168
+ title={P2P: Automated Paper-to-Poster Generation and Fine-Grained Benchmark},
169
+ author={Tao Sun and Enhao Pan and Zhengkai Yang and Kaixin Sui and Jiajun Shi and Xianfu Cheng and Tongliang Li and Wenhao Huang and Ge Zhang and Jian Yang and Zhoujun Li},
170
+ year={2025},
171
+ eprint={2505.17104},
172
+ archivePrefix={arXiv},
173
+ primaryClass={cs.CL},
174
+ url={https://arxiv.org/abs/2505.17104},
175
+ }
176
+ ```
app.py ADDED
@@ -0,0 +1,312 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import tempfile
3
+ import os
4
+ import json
5
+ import shutil
6
+ from pathlib import Path
7
+ import base64
8
+
9
+ from main import generate_paper_poster
10
+
11
+
12
+ def process_paper_to_poster(
13
+ pdf_file,
14
+ model_choice,
15
+ figure_service_url,
16
+ openai_api_key,
17
+ openai_base_url
18
+ ):
19
+ """
20
+ 处理上传的PDF文件并生成海报 - 支持实时状态更新
21
+ """
22
+ if pdf_file is None:
23
+ yield None, None, None, "❌ Please upload a PDF file first!"
24
+ return
25
+
26
+ if not openai_api_key.strip():
27
+ yield None, None, None, "❌ Please enter your OpenAI API Key!"
28
+ return
29
+
30
+ if not figure_service_url.strip():
31
+ yield None, None, None, "❌ Please enter the figure detection service URL!"
32
+ return
33
+
34
+ try:
35
+ # 初始状态
36
+ yield None, None, None, "🚀 Starting poster generation process..."
37
+
38
+ # 配置OpenAI设置
39
+ yield None, None, None, "⚙️ Configuring OpenAI API settings..."
40
+ os.environ['OPENAI_API_KEY'] = openai_api_key.strip()
41
+ if openai_base_url.strip():
42
+ os.environ['OPENAI_BASE_URL'] = openai_base_url.strip()
43
+
44
+ # 创建临时目录
45
+ yield None, None, None, "📁 Creating temporary workspace..."
46
+ temp_dir = tempfile.mkdtemp()
47
+
48
+ # 保存上传的PDF文件
49
+ yield None, None, None, "📄 Processing uploaded PDF file..."
50
+ pdf_path = os.path.join(temp_dir, "paper.pdf")
51
+ shutil.copy(pdf_file.name, pdf_path)
52
+
53
+ # 开始调用生成函数
54
+ yield None, None, None, "🔍 Extracting content from PDF and detecting figures..."
55
+
56
+ # 调用原始生成函数
57
+ poster, html = generate_paper_poster(
58
+ url=figure_service_url,
59
+ pdf=pdf_path,
60
+ vendor="openai",
61
+ model=model_choice,
62
+ text_prompt="", # 使用默认提示
63
+ figures_prompt="", # 使用默认提示
64
+ output="" # 不再使用
65
+ )
66
+
67
+ # 图片处理完毕,开始生成JSON
68
+ yield None, None, None, "🖼️ Image processing completed! Generating JSON structure..."
69
+
70
+ # 将海报转换为JSON以便预览
71
+ json_content = json.dumps(poster.model_dump(), indent=2, ensure_ascii=False)
72
+
73
+ # JSON生成完毕,开始生成HTML
74
+ yield None, None, None, "📋 JSON file generated successfully! Creating HTML poster..."
75
+
76
+ # 创建持久化的临时文件用于下载
77
+ json_file = tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False, encoding='utf-8')
78
+ html_file = tempfile.NamedTemporaryFile(mode='w', suffix='.html', delete=False, encoding='utf-8')
79
+
80
+ # 写入内容到文件
81
+ json_file.write(json_content)
82
+ json_file.close()
83
+
84
+ html_file.write(html)
85
+ html_file.close()
86
+
87
+ # 清理我们创建的临时目录
88
+ yield None, None, None, "🧹 Cleaning up temporary files..."
89
+ shutil.rmtree(temp_dir)
90
+
91
+ # 最终完成
92
+ yield (
93
+ [json_file.name, html_file.name],
94
+ json_content,
95
+ html,
96
+ "✅ Poster generated successfully! 🎉\n📥 Files are ready for download\n🎨 HTML preview is displayed below\n💡 Download the HTML file for best viewing experience"
97
+ )
98
+
99
+ except Exception as e:
100
+ error_msg = f"❌ Error occurred during processing: {str(e)}"
101
+ yield None, None, None, error_msg
102
+
103
+
104
+ # 创建Gradio界面
105
+ def create_interface():
106
+
107
+ # JavaScript代码强制启用Light模式
108
+ js_func = """
109
+ function refresh() {
110
+ const url = new URL(window.location);
111
+
112
+ if (url.searchParams.get('__theme') !== 'light') {
113
+ url.searchParams.set('__theme', 'light');
114
+ window.location.href = url.href;
115
+ }
116
+ }
117
+ """
118
+
119
+ with gr.Blocks(
120
+ title="P2P: Paper-to-Poster Generator",
121
+ theme=gr.themes.Default(), # 使用Light主题
122
+ js=js_func, # 添加JavaScript强制Light模式
123
+ css="""
124
+ .gradio-container {
125
+ max-width: 1600px !important;
126
+ }
127
+ .title {
128
+ text-align: center;
129
+ margin-bottom: 1rem;
130
+ }
131
+ .preview-container {
132
+ min-height: 1000px;
133
+ max-height: 1500px;
134
+ overflow-y: auto;
135
+ border: 1px solid #e0e0e0;
136
+ border-radius: 8px;
137
+ padding: 15px;
138
+ background-color: #fafafa;
139
+ }
140
+ .preview-container iframe {
141
+ width: 100% !important;
142
+ min-height: 1000px !important;
143
+ }
144
+ .config-section {
145
+ margin-bottom: 2rem;
146
+ }
147
+ .status-updating {
148
+ color: #2563eb;
149
+ font-weight: 500;
150
+ }
151
+ """
152
+ ) as demo:
153
+
154
+ gr.HTML("""
155
+ <div class="title">
156
+ <h1>🎓 P2P: Paper-to-Poster Generator</h1>
157
+ <p>Automatically convert academic papers into professional conference posters ✨</p>
158
+ <p><a href="https://arxiv.org/abs/2505.17104" target="_blank">📄 View Research Paper</a></p>
159
+ </div>
160
+ """)
161
+
162
+ # 配置区域 - 水平布局
163
+ with gr.Row(elem_classes=["config-section"]):
164
+ with gr.Column(scale=1):
165
+ gr.Markdown("### 📥 Input Configuration")
166
+
167
+ # 文件上传
168
+ pdf_input = gr.File(
169
+ label="Upload PDF Paper File",
170
+ file_types=[".pdf"],
171
+ file_count="single"
172
+ )
173
+
174
+ # OpenAI API配置
175
+ gr.Markdown("#### 🔑 OpenAI API Configuration")
176
+ openai_api_key = gr.Textbox(
177
+ label="OpenAI API Key",
178
+ placeholder="sk-...",
179
+ type="password",
180
+ info="Enter your OpenAI API key"
181
+ )
182
+
183
+ openai_base_url = gr.Textbox(
184
+ label="OpenAI Base URL (Optional)",
185
+ placeholder="https://api.openai.com/v1",
186
+ value="https://api.openai.com/v1",
187
+ info="Modify this URL if using proxy or other OpenAI-compatible services"
188
+ )
189
+
190
+ with gr.Column(scale=1):
191
+ gr.Markdown("### ⚙️ Model Configuration")
192
+
193
+ # 模型选择
194
+ model_choice = gr.Textbox(
195
+ label="AI Model Name",
196
+ value="gpt-4o-mini",
197
+ placeholder="e.g., gpt-4o-mini, gpt-4o, gpt-3.5-turbo, claude-3-sonnet",
198
+ info="Enter the AI model name you want to use"
199
+ )
200
+
201
+ # 图片检测服务URL
202
+ figure_url = gr.Textbox(
203
+ label="Figure Detection Service URL",
204
+ placeholder="Enter the URL of figure detection service",
205
+ info="Used to extract images and tables from PDF"
206
+ )
207
+
208
+ # 生成按钮
209
+ generate_btn = gr.Button(
210
+ "🚀 Generate Poster",
211
+ variant="primary",
212
+ size="lg"
213
+ )
214
+
215
+ with gr.Column(scale=1):
216
+ gr.Markdown("### 📤 Results & Downloads")
217
+
218
+ # 状态消息
219
+ status_msg = gr.Textbox(
220
+ label="Status Information",
221
+ interactive=False,
222
+ lines=4,
223
+ show_copy_button=True
224
+ )
225
+
226
+ # 文件下载
227
+ output_files = gr.File(
228
+ label="📥 Download Generated Files (JSON & HTML)",
229
+ file_count="multiple",
230
+ interactive=False,
231
+ show_label=True
232
+ )
233
+
234
+ # JSON预览 - 压缩到侧边栏
235
+ with gr.Accordion("📋 JSON Structure", open=False):
236
+ json_preview = gr.Code(
237
+ label="",
238
+ language="json",
239
+ lines=10,
240
+ show_label=False
241
+ )
242
+
243
+ # 预览区域 - 跨栏全宽显示
244
+ gr.Markdown("### 🎨 HTML Poster Preview")
245
+
246
+ gr.Markdown("**💡 Recommended: Download the HTML file from above and open it in your browser for optimal viewing experience**")
247
+
248
+ # HTML预览 - 全宽显示
249
+ html_preview = gr.HTML(
250
+ label="",
251
+ show_label=False,
252
+ elem_classes=["preview-container"]
253
+ )
254
+
255
+ # 使用说明
256
+ gr.Markdown("""
257
+ ### 📖 Usage Instructions
258
+
259
+ 1. **Upload PDF File**: Select the academic paper PDF you want to convert
260
+ 2. **Configure OpenAI API**: Enter your API Key and Base URL (if needed)
261
+ 3. **Select Model**: Enter model name manually, such as gpt-4o-mini, gpt-4o, claude-3-sonnet, etc.
262
+ 4. **Set Figure Service**: Enter the URL of the figure detection service
263
+ 5. **Generate Poster**: Click the generate button and wait for processing
264
+ 6. **Download Results**: Download the generated JSON and HTML files from the download section
265
+ 7. **Full Preview**: Download the HTML file and open it in your browser for the best viewing experience
266
+
267
+ ⚠️ **Important Notes**:
268
+ - Generated Poster is recommended to be viewed in fullscreen mode or download the HTML file to view in browser
269
+ - Requires a valid OpenAI API key with sufficient balance
270
+ - Figure detection service URL is required for extracting images from PDFs
271
+ - Processing time depends on paper length and complexity (usually 1-3 minutes)
272
+ - Ensure the model name is correct and supported by your API
273
+ - Download the HTML file and open it in your browser for the best viewing experience
274
+
275
+ 💡 **Tips**:
276
+ - Recommended to use gpt-4o-mini model for cost-effective testing
277
+ - Recommended to use Claude model for better performance
278
+ - Modify Base URL if using domestic proxy services
279
+ - Supports any OpenAI-compatible model names
280
+ - Can use Claude, Gemini and other models (requires corresponding API configuration)
281
+ - The HTML preview below shows how your poster will look with maximum width for better viewing
282
+ - Download the HTML file from the "Download Generated Files" section for standalone viewing
283
+ """)
284
+
285
+ # 绑定事件
286
+ generate_btn.click(
287
+ fn=process_paper_to_poster,
288
+ inputs=[
289
+ pdf_input,
290
+ model_choice,
291
+ figure_url,
292
+ openai_api_key,
293
+ openai_base_url
294
+ ],
295
+ outputs=[
296
+ output_files,
297
+ json_preview,
298
+ html_preview,
299
+ status_msg
300
+ ]
301
+ )
302
+
303
+ return demo
304
+
305
+
306
+ if __name__ == "__main__":
307
+ demo = create_interface()
308
+ demo.launch(
309
+ server_name="0.0.0.0",
310
+ server_port=7860,
311
+ share=False
312
+ )
figure_detection.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import modal
2
+ from io import BytesIO
3
+ from pathlib import Path
4
+ from fastapi import File, UploadFile, Form
5
+ image = (
6
+ modal.Image.debian_slim(python_version="3.10")
7
+ .apt_install(["libgl1-mesa-glx", "libglib2.0-0"]) # install system libraries for graphics handling
8
+ .pip_install(
9
+ "ultralytics>=8.2.85",
10
+ "doclayout-yolo==0.0.2",
11
+ "huggingface-hub",
12
+ "fastapi",
13
+ )
14
+ )
15
+ volume = modal.Volume.from_name("yolo-layout-detection", create_if_missing=True)
16
+ volume_path = Path("/root") / "data"
17
+ model_path = volume_path / "path2doclayout_yolo_ft.pt"
18
+ app = modal.App(
19
+ "yolo-layout-detection-temp",
20
+ image=image,
21
+ volumes={volume_path: volume},
22
+ )
23
+ @app.function()
24
+ def download_model():
25
+ from huggingface_hub import snapshot_download
26
+ snapshot_download(
27
+ repo_id="opendatalab/pdf-extract-kit-1.0",
28
+ local_dir=volume_path,
29
+ allow_patterns='path2*',
30
+ max_workers=20,
31
+ )
32
+ @app.cls(gpu="a10g")
33
+ class LayoutDetection:
34
+ @modal.enter()
35
+ def load_model(self):
36
+ from doclayout_yolo import YOLOv10
37
+ self.model = YOLOv10(model_path)
38
+ @modal.web_endpoint(method="POST", docs=True)
39
+ async def predict(self, img: UploadFile = File(...), task: str = Form(...)):
40
+ from PIL import Image
41
+ img_bytes = await img.read()
42
+ img = Image.open(BytesIO(img_bytes))
43
+ results = self.model.predict(img)
44
+ # parse results
45
+ figs = []
46
+ for result in results:
47
+ boxes = result.__dict__['boxes'].xyxy.cpu().tolist()
48
+ classes = result.__dict__['boxes'].cls.cpu().tolist()
49
+ scores = result.__dict__['boxes'].conf.cpu().tolist()
50
+ targets, captions = [], []
51
+ for box, cls, score in zip(boxes, classes, scores):
52
+ if task == "figure":
53
+ if cls == 3:
54
+ targets.append({"box": box, "score": score})
55
+ elif task == "table":
56
+ if cls == 5:
57
+ targets.append({"box": box, "score": score})
58
+ elif task == "figurecaption":
59
+ if cls == 3:
60
+ targets.append({"box": box, "score": score})
61
+ elif cls == 4:
62
+ captions.append({"box": box, "score": score})
63
+ elif task == "tablecaption":
64
+ if cls == 5:
65
+ targets.append({"box": box, "score": score})
66
+ elif cls == 6 or cls == 7:
67
+ captions.append({"box": box, "score": score})
68
+ if not captions:
69
+ figs = targets
70
+ else:
71
+ matches = []
72
+ for target in targets:
73
+ min_distance = float('inf')
74
+ for caption in captions:
75
+ target_box, caption_box = target["box"], caption["box"]
76
+ distance = abs(target_box[0] - caption_box[0]) + abs(target_box[3] - caption_box[1])
77
+ if distance < min_distance:
78
+ min_distance = distance
79
+ correct_match = (target, caption)
80
+ matches.append(correct_match)
81
+ for target, caption in matches:
82
+ target_box, caption_box = target["box"], caption["box"]
83
+ union_box = [
84
+ min(target_box[0], caption_box[0]),
85
+ min(target_box[1], caption_box[1]),
86
+ max(target_box[2], caption_box[2]),
87
+ max(target_box[3], caption_box[3]),
88
+ ]
89
+ figs.append({"box": union_box, "score": 1.0})
90
+ return figs
main.py ADDED
@@ -0,0 +1,120 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import base64
2
+ import copy
3
+ import json
4
+ import fire
5
+ import os
6
+ import pathlib
7
+
8
+ from poster.figures import extract_figures
9
+ from poster.poster import (
10
+ generate_html_v2,
11
+ generate_poster_v3,
12
+ replace_figures_in_poster,
13
+ replace_figures_size_in_poster,
14
+ take_screenshot,
15
+ )
16
+
17
+
18
+ def generate_paper_poster(
19
+ url: str,
20
+ pdf: str,
21
+ vendor: str = "openai",
22
+ model: str = "gpt-4o-mini",
23
+ text_prompt: str = "",
24
+ figures_prompt: str = "",
25
+ output: str = "poster.json",
26
+ ):
27
+ """Generate a paper poster
28
+
29
+ Args:
30
+ url: URL of the PDF file
31
+ pdf: Local path of the PDF file
32
+ model: Name of the model to use, default is gpt-4o-mini
33
+ text_prompt: Text prompt template,
34
+ figures_prompt: Figures prompt template,
35
+ output: Output file path, default is poster.json
36
+ """
37
+ pdf_stem = pdf.replace(".pdf", "")
38
+ figures_cache = f"{pdf_stem}_figures.json"
39
+ figures_cap_cache = f"{pdf_stem}_figures_cap.json"
40
+
41
+ figures = []
42
+ figures_cap = []
43
+ print("开始提取图片...")
44
+ if os.path.exists(figures_cache) and os.path.exists(figures_cap_cache):
45
+ print(f"使用缓存的图片: {figures_cache}")
46
+ with open(figures_cache, "r") as f:
47
+ figures = json.load(f)
48
+ with open(figures_cap_cache, "r") as f:
49
+ figures_cap = json.load(f)
50
+ else:
51
+ figures_img = extract_figures(url, pdf, task="figure")
52
+ figures_table = extract_figures(url, pdf, task="table")
53
+ img_caption = extract_figures(url, pdf, task="figurecaption")
54
+ table_caption = extract_figures(url, pdf, task="tablecaption")
55
+ threshold = 0.85
56
+ while True:
57
+ figures = [
58
+ image
59
+ for image, score in figures_img + figures_table
60
+ if score >= threshold
61
+ ]
62
+ figures_cap = [
63
+ image
64
+ for image, score in img_caption + table_caption
65
+ if score >= threshold
66
+ ]
67
+ print(f"{threshold:.2f} 提取到 {len(figures)} / {len(figures_cap)} 张图像")
68
+ if len(figures) == len(figures_cap):
69
+ break
70
+ threshold -= 0.05
71
+
72
+ with open(figures_cache, "w") as f:
73
+ json.dump(figures, f, ensure_ascii=False)
74
+ with open(figures_cap_cache, "w") as f:
75
+ json.dump(figures_cap, f, ensure_ascii=False)
76
+
77
+ while True:
78
+ try:
79
+ result = generate_poster_v3(
80
+ vendor, model, text_prompt, figures_prompt, pdf, figures_cap, figures
81
+ )
82
+
83
+ poster = result["image_based_poster"]
84
+ backup_poster = copy.deepcopy(poster)
85
+
86
+ poster = replace_figures_in_poster(poster, figures)
87
+
88
+ # with open(output, "w") as f:
89
+ # json.dump(poster.model_dump(), f, ensure_ascii=False)
90
+
91
+ poster_size = replace_figures_size_in_poster(backup_poster, figures)
92
+ print("Now generating HTML...")
93
+ result = generate_html_v2(vendor, model, poster_size, figures)
94
+
95
+ html = result["html_with_figures"]
96
+
97
+ # with open(output.replace(".json", ".html"), "w") as f:
98
+ # f.write(html)
99
+ # take_screenshot(output, html)
100
+
101
+ return poster, html
102
+
103
+ except Exception as e:
104
+ if (
105
+ "content management policy" in str(e)
106
+ or "message larger than max" in str(e)
107
+ or "exceeds the maximum length" in str(e)
108
+ or "maximum context length" in str(e)
109
+ or "Input is too long" in str(e)
110
+ or "image exceeds 5 MB" in str(e)
111
+ or "too many total text bytes" in str(e)
112
+ or "Range of input length" in str(e)
113
+ or "Invalid text" in str(e)
114
+ ):
115
+ raise
116
+ print(f"处理文件 {pdf} 时出错: {e}")
117
+
118
+
119
+ if __name__ == "__main__":
120
+ fire.Fire(generate_paper_poster)
poster/__init__.py ADDED
File without changes
poster/compress.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import base64
2
+ import io
3
+ from PIL import Image
4
+
5
+
6
+ def compress_image(base64_str, quality=85, max_size=(1024, 1024)):
7
+ """
8
+ 压缩base64编码的图片
9
+
10
+ 参数:
11
+ base64_str: base64编码的图片字符串
12
+ quality: 压缩质量 (1-100)
13
+ max_size: 最大尺寸 (宽, 高)
14
+
15
+ 返回:
16
+ 压缩后的base64编码字符串
17
+ """
18
+ try:
19
+
20
+ img_data = base64.b64decode(base64_str)
21
+ img = Image.open(io.BytesIO(img_data))
22
+
23
+
24
+ if img.width > max_size[0] or img.height > max_size[1]:
25
+ img.thumbnail(max_size, Image.LANCZOS)
26
+
27
+
28
+ output = io.BytesIO()
29
+ img.save(output, format="PNG", optimize=True, quality=quality)
30
+
31
+
32
+ compressed_base64 = base64.b64encode(output.getvalue()).decode("utf-8")
33
+ return compressed_base64
34
+ except Exception as e:
35
+ print(f"图片压缩失败: {e}")
36
+ return base64_str # 如果压缩失败,返回原始图片
poster/figures.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import base64
2
+ import requests
3
+ import os
4
+ from pathlib import Path
5
+
6
+ from io import BytesIO
7
+ from PIL import Image
8
+ from retry import retry
9
+
10
+ from .loader import ImagePDFLoader
11
+
12
+
13
+ @retry(tries=3)
14
+ def _extract_figures(
15
+ url: str, img: Image.Image, task: str = "figure"
16
+ ) -> list[tuple[Image.Image, float]]:
17
+ figures = []
18
+
19
+ with BytesIO() as buffer:
20
+ img.save(buffer, format="PNG")
21
+
22
+ files = [("img", ("image.png", buffer.getvalue(), "image/png"))]
23
+ payload = {"task": task}
24
+ rsp = requests.request("POST", url, data=payload, files=files)
25
+ rsp.raise_for_status()
26
+
27
+ for data in rsp.json():
28
+ figures.append((img.crop(data["box"]), data["score"]))
29
+
30
+ return figures
31
+
32
+
33
+ def extract_figures(
34
+ url: str, pdf: str, task: str = "figure"
35
+ ) -> list[tuple[str, float]]:
36
+ loader = ImagePDFLoader(pdf)
37
+ images = loader.load()
38
+
39
+ figures = []
40
+ for image in images:
41
+ figures.extend(_extract_figures(url, image, task))
42
+
43
+ base64_figures = []
44
+ for figure, score in figures:
45
+ with BytesIO() as buffer:
46
+ figure.save(buffer, format="PNG")
47
+ base64_figures.append(
48
+ (base64.b64encode(buffer.getvalue()).decode("utf-8"), score)
49
+ )
50
+
51
+ return base64_figures
52
+
53
+
54
+ if __name__ == "__main__":
55
+ url = ""
56
+ pdf = "1.pdf"
57
+
58
+ output_dir = Path("output")
59
+ output_dir.mkdir(exist_ok=True)
60
+
61
+ base64_figures = extract_figures(url, pdf, task="figurecaption")
62
+
63
+ print(f"提取到 {len(base64_figures)} 张图像")
64
+
65
+ for i, (b64_str, score) in enumerate(base64_figures):
66
+ img_data = base64.b64decode(b64_str)
67
+ img = Image.open(BytesIO(img_data))
68
+
69
+ output_path = output_dir / f"figure_{i + 1}.png"
70
+ img.save(output_path)
71
+ print(f"图像已保存到: {output_path}")
72
+
73
+ print(f"所有图像已保存到 {output_dir} 目录")
poster/loader.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import fitz
2
+
3
+ from PIL import Image
4
+ from langchain_community.document_loaders import PyMuPDFLoader
5
+
6
+
7
+ class ImagePDFLoader(PyMuPDFLoader):
8
+ def load_pdf_page(self, page: fitz.Page, dpi: int) -> Image.Image:
9
+ pix = page.get_pixmap(matrix=fitz.Matrix(dpi / 72, dpi / 72))
10
+ image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
11
+
12
+ if pix.width > 3000 or pix.height > 3000:
13
+ pix = page.get_pixmap(matrix=fitz.Matrix(1, 1), alpha=False)
14
+ image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
15
+
16
+ return image
17
+
18
+ def load(self) -> list[Image.Image]:
19
+ images = []
20
+
21
+ doc = fitz.open(self.file_path)
22
+ for i in range(len(doc)):
23
+ page = doc[i]
24
+ image = self.load_pdf_page(page, dpi=250)
25
+ images.append(image)
26
+
27
+ return images
poster/poster.py ADDED
@@ -0,0 +1,730 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import base64
2
+ import io
3
+ import json
4
+ import os
5
+ import re
6
+ import subprocess
7
+ import time
8
+ import cairosvg
9
+
10
+ from PIL import Image
11
+ from pdf2image import convert_from_path
12
+ from playwright.sync_api import sync_playwright
13
+ from pydantic import BaseModel, Field, create_model
14
+ from tqdm import tqdm
15
+
16
+ from langchain import hub
17
+
18
+ # from langchain_google_genai import ChatGoogleGenerativeAI
19
+ from langchain_openai import ChatOpenAI, AzureChatOpenAI
20
+ from langchain_openai.chat_models.base import BaseChatOpenAI
21
+
22
+ from langchain_community.document_loaders import PyMuPDFLoader
23
+ from langchain_core.prompts import (
24
+ HumanMessagePromptTemplate,
25
+ SystemMessagePromptTemplate,
26
+ ChatPromptTemplate,
27
+ MessagesPlaceholder,
28
+ PromptTemplate,
29
+ )
30
+ from langchain_core.prompts.image import ImagePromptTemplate
31
+ from langchain_core.output_parsers import PydanticOutputParser
32
+ from langchain_core.exceptions import OutputParserException
33
+ from langchain.output_parsers import OutputFixingParser
34
+ from langchain_core.messages import HumanMessage, SystemMessage, AIMessage
35
+
36
+
37
+ def create_dynamic_poster_model(sections: dict[str, str]) -> type[BaseModel]:
38
+ """Dynamically create a Poster model based on sections returned by LLM."""
39
+ fields = {
40
+ "title": (str, Field(default="", description="Title of the paper")),
41
+ "authors": (str, Field(default="", description="Authors of the paper")),
42
+ "affiliation": (
43
+ str,
44
+ Field(default="", description="Affiliation of the authors"),
45
+ ),
46
+ }
47
+
48
+ for section_name, description in sections.items():
49
+ fields[section_name] = (str, Field(default="", description=description))
50
+
51
+ return create_model("DynamicPoster", **fields)
52
+
53
+
54
+ def remove_think_tags(llm_output):
55
+ if hasattr(llm_output, "content"):
56
+ content = llm_output.content
57
+ cleaned_content = re.sub(r"<think>.*?</think>", "", content, flags=re.DOTALL)
58
+ cleaned_content = re.sub(r"<think>.*", "", cleaned_content, flags=re.DOTALL)
59
+ return AIMessage(content=cleaned_content)
60
+ elif isinstance(llm_output, str):
61
+ cleaned_output = re.sub(r"<think>.*?</think>", "", llm_output, flags=re.DOTALL)
62
+ cleaned_output = re.sub(r"<think>.*", "", cleaned_output, flags=re.DOTALL)
63
+ return cleaned_output
64
+ return llm_output
65
+
66
+
67
+ def replace_figures_in_markdown(
68
+ markdown: str,
69
+ figures: list[str],
70
+ ) -> str:
71
+ pattern = r"!\[(.*?)\]\((\d+)\)"
72
+
73
+ def replacer(match):
74
+ figure_index = int(match.group(2))
75
+ if 0 <= figure_index < len(figures):
76
+ return f"![{match.group(1)}]({figures[figure_index]})"
77
+ return match.group(0)
78
+
79
+ return re.sub(pattern, replacer, markdown)
80
+
81
+
82
+ def replace_figures_in_poster(
83
+ poster: BaseModel,
84
+ figures: list[str],
85
+ ) -> BaseModel:
86
+ for field in poster.model_fields:
87
+ if hasattr(poster, field):
88
+ value = getattr(poster, field)
89
+ if isinstance(value, str):
90
+ setattr(poster, field, replace_figures_in_markdown(value, figures))
91
+ return poster
92
+
93
+
94
+ def replace_figures_size_in_markdown(
95
+ markdown: str,
96
+ figures: list[str],
97
+ ) -> str:
98
+ pattern = r"!\[(.*?)\]\((\d+)\)"
99
+
100
+ def replacer(match):
101
+ figure_index = int(match.group(2))
102
+ if 0 <= figure_index < len(figures):
103
+ data = base64.b64decode(figures[figure_index])
104
+ image = Image.open(io.BytesIO(data))
105
+ width, height = image.size
106
+ return f"![{match.group(1)}, width = {width}, height = {height}, aspect ratio = {width / height:.4f}]({match.group(2)})"
107
+ return match.group(0)
108
+
109
+ return re.sub(pattern, replacer, markdown)
110
+
111
+
112
+ def replace_figures_size_in_poster(
113
+ poster: BaseModel,
114
+ figures: list[str],
115
+ ) -> BaseModel:
116
+ for field in poster.model_fields:
117
+ if hasattr(poster, field):
118
+ value = getattr(poster, field)
119
+ if isinstance(value, str):
120
+ setattr(poster, field, replace_figures_size_in_markdown(value, figures))
121
+ return poster
122
+
123
+
124
+ def replace_figures_in_html(html: str, figures: list[str]) -> str:
125
+ pattern = r"src=\"(\d+)\""
126
+
127
+ def replacer(match):
128
+ figure_index = int(match.group(1))
129
+ if 0 <= figure_index < len(figures):
130
+ return f'src="data:image/png;base64,{figures[figure_index]}"'
131
+ return match.group(0)
132
+
133
+ return re.sub(pattern, replacer, html)
134
+
135
+
136
+ def get_sizes(type: str, html: str) -> list[list[dict]]:
137
+ with sync_playwright() as p:
138
+ browser = p.chromium.launch(headless=True)
139
+ page = browser.new_page()
140
+
141
+ page.set_content(html)
142
+
143
+ contents = page.query_selector_all(f".{type}-content")
144
+ content_sizes = []
145
+
146
+ for content in contents:
147
+ groups = content.query_selector_all(f"> *")
148
+ group_sizes = []
149
+
150
+ for group in groups:
151
+ is_group = group.evaluate(
152
+ f"element => element.classList.contains('{type}-group')"
153
+ )
154
+ if not is_group:
155
+ bounding_box = group.bounding_box()
156
+ group_sizes.append(
157
+ [
158
+ {
159
+ "width": bounding_box["width"],
160
+ "height": bounding_box["height"],
161
+ }
162
+ ]
163
+ )
164
+ continue
165
+
166
+ group.evaluate("(element) => element.style.alignItems = 'start'")
167
+
168
+ columns = group.query_selector_all(f".{type}-column")
169
+ column_sizes = []
170
+
171
+ for column in columns:
172
+ bounding_box = column.bounding_box()
173
+ column_sizes.append(
174
+ {
175
+ "width": bounding_box["width"],
176
+ "height": bounding_box["height"],
177
+ }
178
+ )
179
+
180
+ group_sizes.append(column_sizes)
181
+
182
+ content_sizes.append(group_sizes)
183
+
184
+ browser.close()
185
+ return content_sizes
186
+
187
+
188
+ def generate_html_v2(vendor: str, model: str, poster: BaseModel, figures: list[str]):
189
+ if vendor == "openai":
190
+ if "o1" in model or "o3" in model or "o4" in model:
191
+ llm = ChatOpenAI(
192
+ model=model,
193
+ temperature=1,
194
+ max_tokens=8000,
195
+ )
196
+ else:
197
+ llm = BaseChatOpenAI(
198
+ model=model,
199
+ temperature=1,
200
+ max_tokens=8000,
201
+ # model_kwargs={
202
+ # "extra_body": {"chat_template_kwargs": {"enable_thinking": False}}
203
+ # },
204
+ )
205
+
206
+ style = """<style>
207
+ html {
208
+ font-family: "Times New Roman", Times, serif;
209
+ font-size: 16px;
210
+ }
211
+
212
+ body {
213
+ width: 1280px;
214
+ margin: 0;
215
+ }
216
+
217
+ ol,
218
+ ul {
219
+ margin-left: 0.5rem;
220
+ }
221
+
222
+ li {
223
+ margin-bottom: 0.5rem;
224
+ }
225
+
226
+ img {
227
+ width: calc(100% - 2rem);
228
+ margin: 0.5rem 1rem;
229
+ }
230
+
231
+ .poster-header {
232
+ padding: 2rem;
233
+ text-align: center;
234
+ }
235
+
236
+ .poster-title {
237
+ margin-bottom: 1rem;
238
+ font-size: 1.875rem;
239
+ font-weight: bold;
240
+ }
241
+
242
+ .poster-author {
243
+ margin-bottom: 0.5rem;
244
+ }
245
+
246
+ .poster-content {
247
+ padding: 1rem;
248
+ }
249
+
250
+ .section {
251
+ margin-bottom: 1rem;
252
+ }
253
+
254
+ .section-title {
255
+ padding: 0.5rem 1rem;
256
+ font-weight: bold;
257
+ }
258
+
259
+ .section-content {
260
+ margin: 0 1rem;
261
+ }
262
+ </style>
263
+ """
264
+
265
+ layout_prompt = ChatPromptTemplate.from_messages(
266
+ [
267
+ SystemMessage(
268
+ content="You are a professional academic poster web page creator and your task is to generate the HTML code for a nicely laid out academic poster web page based on the object provided."
269
+ ),
270
+ HumanMessagePromptTemplate.from_template(
271
+ """# Object Description
272
+ - The object contains several fields. Each field represents a section, except for the title, author and affiliation fields. The field name is the title of the section and the field value is the Markdown content of the section.
273
+ - The image in Markdown is given in the format ![alt_text, width = original_width, height = original_height, aspect ratio = aspect_ratio](image_index).
274
+
275
+ # HTML Structure
276
+ - Only generate the HTML code inside <body>, without any other things.
277
+ - Place title, author and affiliation inside <div class="poster-header">. Place title inside <div class="poster-title">, author inside <div class="poster-author"> and affiliation inside <div class="poster-affiliation">.
278
+ - Place content inside <div class="poster-content">.
279
+ - Place each section inside <div class="section">. Place section title inside <div class="section-title"> and section content inside <div class="section-content">.
280
+ - Use <p> for paragraphs.
281
+ - Use <ol> and <li> for ordered lists, and <ul> and <li> for unordered lists.
282
+ - Use <img src="image_index" alt="alt_text"> for images.
283
+ - Use <strong> for bold text and <em> for italic text.
284
+ - Do not use tags other than <div>, <p>, <ol>, <ul>, <li>, <img>, <strong>, <em>.
285
+ - Do not create any sections that are not in the object. Do not split or merge any existing sections.
286
+ - Sections and contents should be strictly equal to the object, and should be placed strictly in the order of the object.
287
+
288
+ # Color Specification
289
+ - Select at least 2 colors from the visual identity of the affiliation. If there are multiple affiliations, consider the most well-known one.
290
+ - For example, Tsinghua University uses #660874 and #d93379, Beihang University uses #005bac and #003da6, Zhejiang University uses #003f88 and #b01f24. These are just examples, you must pick colors from the actual visual identity of the affiliation.
291
+ - Add text and background color to poster header and section title using inline style. Use gradient to make the poster more beautiful.
292
+ - The text and background color of each section title should be the same.
293
+ - Do not add styles other than color, background, border, box-shadow.
294
+ - Do not add styles like width, height, padding, margin, font-size, font-weight, border-radius.
295
+
296
+ # Layout Specification
297
+ - Optionally, inside <div class="poster-content">, group sections into columns using <div class="poster-group" style="display: flex; gap: 1rem"> and <div class="poster-column" style="flex: 1">.
298
+ - You must determine the optimal number and flex grow value of columns to create a balanced poster layout. If one column becomes too tall, redistribute sections to other columns.
299
+ - There can be multiple groups with different number and flex grow of columns.
300
+ - Optionally, inside <div class="section-content">, group texts and images into columns using <div class="section-group" style="display: flex; gap: 0.5rem"> and <div class="section-column" style="flex: 1">.
301
+ - For example, if there are two images in two columns whose aspect ratios are 1.2 and 2 respectively, the flex grow of two columns should be 1.2 and 2 respectively, to make the columns have the same height.
302
+ - Calculate the size of each image based on column width and aspect ratios. Add comment <!-- width = display_width, height = display_height --> before each image.
303
+ - Rearrange the structure and order of sections, texts and images to make the height of each column in the same group approximately the same.
304
+ - For example, if there are too many images in one section that make the height of the column too large, group the images into columns.
305
+ - The display width of each image should not be too large or too small compared to its original width.
306
+ - DO NOT LEAVE MORE THAN 5% BLANK SPACE IN THE POSTER.
307
+ - Use a 3-column or 4-column layout with a landscape (horizontal) orientation for optimal visual presentation.
308
+
309
+ # Output Requirement
310
+ - Please output the result in the following format:
311
+ <think>
312
+ Think step by step, considering all structures and specifications listed above one by one.
313
+ Calculate the width and height of each column, text and image in detail, based on given style.
314
+ </think>
315
+ ```html
316
+ HTML code inside <body>.
317
+ ```
318
+ - Please make the content in <think> as detailed and comprehensive as possible.
319
+
320
+ # Existing Style
321
+ {style}
322
+
323
+ # Object
324
+ {poster}
325
+ """
326
+ ),
327
+ ]
328
+ )
329
+ layout_chain = layout_prompt | llm
330
+ output = layout_chain.invoke({"style": style, "poster": poster}).content
331
+ layout_prompt.append(
332
+ MessagesPlaceholder(variable_name="react"),
333
+ )
334
+
335
+ HTML_TEMPLATE = """<!DOCTYPE html>
336
+ <html>
337
+ <head>
338
+ <title>Poster</title>
339
+ {style}
340
+ <script>
341
+ MathJax = {{ tex: {{ inlineMath: [["$", "$"]] }} }};
342
+ </script>
343
+ <script
344
+ id="MathJax-script"
345
+ async
346
+ src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js"
347
+ ></script>
348
+ </head>
349
+ <body>
350
+ {body}
351
+ </body>
352
+ </html>
353
+ """
354
+
355
+ def get_content_sizes(sizes: list[list[dict]]) -> float:
356
+ """Calculate the total content size from the sizes data structure"""
357
+ return sum(
358
+ column["width"] * column["height"]
359
+ for content in sizes
360
+ for group in content
361
+ for column in group
362
+ )
363
+
364
+ def get_total_size(sizes: list[list[dict]]) -> float:
365
+ """Calculate the total size including spacing from the sizes data structure"""
366
+ return sum(
367
+ (
368
+ sum(column["width"] for column in group)
369
+ * max((column["height"] for column in group), default=0)
370
+ )
371
+ for content in sizes
372
+ for group in content
373
+ )
374
+
375
+ def calculate_blank_proportion(poster_sizes, section_sizes) -> float:
376
+ """Calculate the proportion of blank space in the poster"""
377
+ poster_content_sizes = get_content_sizes(poster_sizes)
378
+ section_content_sizes = get_content_sizes(section_sizes)
379
+ poster_total_size = get_total_size(poster_sizes)
380
+ section_total_size = get_total_size(section_sizes)
381
+
382
+ if poster_total_size == 0:
383
+ return 1.0
384
+
385
+ return (
386
+ 1.0
387
+ - (poster_content_sizes - (section_total_size - section_content_sizes))
388
+ / poster_total_size
389
+ )
390
+
391
+ max_attempts = 5
392
+ attempt = 1
393
+
394
+ while True:
395
+ body = re.search(r"```html\n(.*?)\n```", output, re.DOTALL).group(1)
396
+
397
+ html = HTML_TEMPLATE.format(style=style, body=body)
398
+ html_with_figures = replace_figures_in_html(html, figures)
399
+
400
+ poster_sizes = get_sizes("poster", html_with_figures)
401
+ section_sizes = get_sizes("section", html_with_figures)
402
+
403
+ proportion = calculate_blank_proportion(poster_sizes, section_sizes)
404
+ if proportion <= 0.1:
405
+ print(
406
+ f"Attempted {attempt} times, remaining {proportion:.0%} blank spaces."
407
+ )
408
+ return {"html": html, "html_with_figures": html_with_figures}
409
+
410
+ attempt += 1
411
+ if attempt > max_attempts:
412
+ raise ValueError(f"Invalid blank spaces: {proportion:.0%}")
413
+
414
+ react = [
415
+ # AIMessage(""),
416
+ HumanMessage(
417
+ content=f"""# Previous Body
418
+ {body}
419
+
420
+ # Previous Size of Columns in Poster
421
+ {poster_sizes}
422
+
423
+ # Previous Size of Columns in Section
424
+ {section_sizes}
425
+
426
+ Now there are {proportion:.0%} blank spaces. Please regenerate the content to create a more balanced poster layout.
427
+ """
428
+ ),
429
+ ]
430
+
431
+ output = layout_chain.invoke(
432
+ {"style": style, "poster": poster, "react": react}
433
+ ).content
434
+
435
+
436
+ def take_screenshot(output: str, html: str):
437
+ with sync_playwright() as p:
438
+ browser = p.chromium.launch(headless=True)
439
+ page = browser.new_page(viewport={"width": 1280, "height": 100})
440
+ page.set_content(html)
441
+ page.screenshot(
442
+ type="png", path=output.replace(".json", ".png"), full_page=True
443
+ )
444
+ browser.close()
445
+
446
+
447
+ def replace_figures_in_svg(svg: str, figures: list[str]) -> str:
448
+ pattern = r"href=\"(\d+)\""
449
+
450
+ def replacer(match):
451
+ figure_index = int(match.group(1))
452
+ if 0 <= figure_index < len(figures):
453
+ return f'href="data:image/png;base64,{figures[figure_index]}"'
454
+ return match.group(0)
455
+
456
+ return re.sub(pattern, replacer, svg)
457
+
458
+
459
+ def svg_to_png(output: str, svg: str):
460
+ cairosvg.svg2png(
461
+ bytestring=svg.encode("utf-8"),
462
+ write_to=output.replace(".json", ".png"),
463
+ output_width=7000,
464
+ )
465
+
466
+
467
+ def replace_figures_in_latex(latex: str, figures: list[str]) -> str:
468
+ pattern = r"\\includegraphics(\[.*?\])?\{(\d+)\}"
469
+
470
+ def replacer(match):
471
+ figure_index = int(match.group(2))
472
+ options = match.group(1) or ""
473
+ if 0 <= figure_index < len(figures):
474
+ return f"\\includegraphics{options}{{figure_{figure_index}.png}}"
475
+ return match.group(0)
476
+
477
+ return re.sub(pattern, replacer, latex)
478
+
479
+
480
+ def latex_to_png(output: str, latex: str):
481
+ subprocess.run(
482
+ [
483
+ "pdflatex",
484
+ "-interaction=nonstopmode",
485
+ f"-output-directory={os.path.dirname(output)}",
486
+ output.replace(".json", ".tex"),
487
+ ],
488
+ stdout=subprocess.PIPE,
489
+ stderr=subprocess.PIPE,
490
+ )
491
+ images = convert_from_path(output.replace(".json", ".pdf"), dpi=300)
492
+ images[0].save(output.replace(".json", ".png"))
493
+
494
+
495
+ def generate_poster_v3(
496
+ vendor: str,
497
+ model: str,
498
+ text_prompt: str,
499
+ figures_prompt: str,
500
+ pdf: str,
501
+ figures: list[str],
502
+ figures_index: list[str],
503
+ ) -> dict:
504
+ # Setup LLM
505
+ if vendor == "openai":
506
+ if "o1" in model or "o3" in model or "o4" in model:
507
+ llm = ChatOpenAI(
508
+ model=model,
509
+ temperature=1,
510
+ max_tokens=8000,
511
+ )
512
+ else:
513
+ llm = BaseChatOpenAI(
514
+ model=model,
515
+ temperature=1,
516
+ max_tokens=8000,
517
+ # model_kwargs={
518
+ # "extra_body": {"chat_template_kwargs": {"enable_thinking": False}}
519
+ # },
520
+ )
521
+ loader = PyMuPDFLoader(pdf)
522
+ pages = loader.load()
523
+ paper_content = "\n".join([page.page_content for page in pages])
524
+
525
+ from .compress import compress_image
526
+
527
+ figure_messages = [
528
+ HumanMessagePromptTemplate(
529
+ prompt=[
530
+ ImagePromptTemplate(
531
+ input_variables=["figure"],
532
+ template={"url": "data:image/png;base64,{figure}"},
533
+ ),
534
+ ],
535
+ ).format(figure=compress_image(figure, quality=85, max_size=(64, 64)))
536
+ for figure in figures
537
+ ]
538
+
539
+ json_format_example = """
540
+ ```json
541
+ {{
542
+ "Introduction": "Brief overview of the paper's main topic and objectives.",
543
+ "Methodology": "Description of the methods used in the research.",
544
+ "Results": "Summary of the key findings and results."
545
+ }}
546
+ ```
547
+ """
548
+ sections = None
549
+ for _ in range(5):
550
+ section_prompt = ChatPromptTemplate.from_messages(
551
+ [
552
+ SystemMessage(content="You are an expert in academic paper analysis."),
553
+ HumanMessagePromptTemplate.from_template(
554
+ """Please analyze the paper content and identify the key sections that should be included in the poster.
555
+ For each section, provide a concise description of what should be included. First, determine the paper type:
556
+ - For methodology research papers: Focus on method description, experimental results, and research methodology.
557
+ - For benchmark papers: Highlight task definitions, dataset construction, and evaluation outcomes.
558
+ - For survey/review papers: Emphasize field significance, key developmental milestones, critical theories/techniques, current challenges, and emerging trends.
559
+
560
+ Note that the specific section names should be derived from the paper's content. Related sections can be combined to avoid fragmentation. Limit the total number of sections to maintain clarity. Do not include acknowledgements or references sections.
561
+
562
+ Return the result as a flat JSON object with section names as keys and descriptions as values, without nested structures. You MUST use Markdown code block syntax with the json language specifier.
563
+
564
+ Example format:
565
+ {json_format_example}
566
+
567
+ Paper content:
568
+ {paper_content}
569
+ """
570
+ ),
571
+ ]
572
+ )
573
+ sections_response = llm.invoke(
574
+ section_prompt.format(
575
+ json_format_example=json_format_example, paper_content=paper_content
576
+ )
577
+ )
578
+
579
+ json_pattern = r"```json(.*?)```"
580
+ match = re.search(json_pattern, sections_response.content, re.DOTALL)
581
+ if match:
582
+ json_content = match.group(1)
583
+ else:
584
+ continue
585
+
586
+ try:
587
+ sections = eval(json_content.strip())
588
+ if all(
589
+ isinstance(k, str) and isinstance(v, str) for k, v in sections.items()
590
+ ):
591
+ break
592
+ except Exception:
593
+ continue
594
+
595
+ if sections is None:
596
+ raise ValueError("Failed to retrieve valid sections from LLM response.")
597
+
598
+ DynamicPoster = create_dynamic_poster_model(sections)
599
+
600
+ figures_description_prompt = ChatPromptTemplate.from_messages(
601
+ [
602
+ SystemMessage(
603
+ content="You are an academic image analysis expert. Provide concise descriptions (under 100 words) of academic figures, diagrams, charts, or images. Identify what the figure displays, its likely purpose in academic literature, and highlight key data points or trends. Focus on clarity and academic relevance while maintaining precision in your analysis."
604
+ ),
605
+ HumanMessagePromptTemplate(
606
+ prompt=[
607
+ # PromptTemplate(template="Describe this image:"),
608
+ ImagePromptTemplate(
609
+ input_variables=["image_data"],
610
+ template={"url": "data:image/png;base64,{image_data}"},
611
+ ),
612
+ ],
613
+ ),
614
+ ]
615
+ )
616
+
617
+ use_claude = False
618
+ mllm = BaseChatOpenAI(
619
+ temperature=1,
620
+ max_tokens=8000,
621
+ )
622
+
623
+ figures_with_descriptions = ""
624
+ figure_list = []
625
+
626
+ figures_description_cache = pdf.replace(".pdf", "_figures_description.json")
627
+ if use_claude and os.path.exists(figures_description_cache):
628
+ with open(figures_description_cache, "r") as f:
629
+ figures_with_descriptions = f.read()
630
+ else:
631
+ figure_chain = figures_description_prompt | (mllm if use_claude else llm)
632
+ for i, figure in enumerate(tqdm(figures, desc=f"处理图片 {pdf}")):
633
+ figure_description_response = figure_chain.invoke({"image_data": figure})
634
+ figures_with_descriptions += f"""
635
+ <figure_{i}>
636
+ {figure_description_response.content}
637
+ </figure_{i}>
638
+ """
639
+ figure_list.append(
640
+ {"figure": figure, "description": figure_description_response.content}
641
+ )
642
+ if use_claude:
643
+ with open(figures_description_cache, "w") as f:
644
+ f.write(figures_with_descriptions)
645
+
646
+ text_prompt = ChatPromptTemplate.from_messages(
647
+ [
648
+ SystemMessage(
649
+ content="You are a helpful academic expert, who is specialized in generating a text-based paper poster, from given contents."
650
+ ),
651
+ HumanMessagePromptTemplate.from_template(
652
+ """Below is the figures with descriptions in the paper:
653
+ <figures>
654
+ {figures}
655
+ </figures>
656
+
657
+ Below is the content of the paper:
658
+ <paper_content>
659
+ {paper_content}
660
+ </paper_content>
661
+
662
+ If figures can effectively convey the poster content, simplify the related text to avoid redundancy. Include essential mathematical formulas where they enhance understanding.
663
+
664
+ {format_instructions}
665
+
666
+ Ensure all sections are precise, concise, and presented in markdown format without headings."""
667
+ ),
668
+ ]
669
+ )
670
+ parser = PydanticOutputParser(pydantic_object=DynamicPoster)
671
+ fixing_parser = OutputFixingParser.from_llm(parser=parser, llm=llm)
672
+ text_prompt = text_prompt.partial(
673
+ format_instructions=parser.get_format_instructions()
674
+ )
675
+ text_chain = text_prompt | llm | remove_think_tags | parser
676
+ try:
677
+ text_poster = text_chain.invoke(
678
+ {"paper_content": paper_content, "figures": figures_with_descriptions}
679
+ )
680
+ except OutputParserException as e:
681
+ text_poster = fixing_parser.parse(e.llm_output)
682
+
683
+ figures_prompt = ChatPromptTemplate.from_messages(
684
+ [
685
+ SystemMessagePromptTemplate.from_template(
686
+ "You are a helpful academic expert, who is specialized in generating a paper poster, from given contents and figures. "
687
+ ),
688
+ HumanMessagePromptTemplate.from_template(
689
+ """Below is the figures with descriptions in the paper:
690
+ <figures>
691
+ {figures}
692
+ </figures>
693
+
694
+ I have already generated a text-based poster as follows:
695
+ <poster_content>
696
+ {poster_content}
697
+ </poster_content>
698
+
699
+ The paper content is as follows:
700
+ <paper_content>
701
+ {paper_content}
702
+ </paper_content>
703
+
704
+ Insert figures into the poster content using figure index notation as `![figure_description](figure_index)`. For example, `![Overview](0)`.
705
+ The figure_index MUST be an integer starting from 0, and no other text should be used in the figure_index position.
706
+ Each figure should be used at most once, with precise and accurate placement.
707
+ Prioritize pictures and tables based on their relevance and importance to the content.
708
+
709
+ {format_instructions}"""
710
+ ),
711
+ ]
712
+ )
713
+ figures_prompt = figures_prompt.partial(
714
+ figures=figures_with_descriptions,
715
+ format_instructions=parser.get_format_instructions(),
716
+ )
717
+ figures_chain = figures_prompt | llm | remove_think_tags | parser
718
+ try:
719
+ figures_poster = figures_chain.invoke(
720
+ {"poster_content": text_poster, "paper_content": paper_content}
721
+ )
722
+ except OutputParserException as e:
723
+ figures_poster = fixing_parser.parse(e.llm_output)
724
+
725
+ return {
726
+ "sections": sections,
727
+ "figures": figure_list,
728
+ "text_based_poster": text_poster,
729
+ "image_based_poster": figures_poster,
730
+ }
requirements.txt ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ bert_score==0.3.13
2
+ fire==0.7.0
3
+ fitz==0.0.1.dev2
4
+ langchain==0.3.19
5
+ langchain_anthropic==0.3.8
6
+ langchain_community==0.3.18
7
+ langchain_core==0.3.40
8
+ langchain_openai==0.3.7
9
+ numpy==2.2.3
10
+ pdf2image==1.17.0
11
+ Pillow==11.1.0
12
+ pydantic==2.10.6
13
+ PyPDF2==3.0.1
14
+ Requests==2.32.3
15
+ retry==0.9.2
16
+ rouge==1.0.1
17
+ skimage==0.0
18
+ tqdm==4.67.1
19
+ gradio
20
+ cairosvg==2.7.1
21
+ playwright
start.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import os
3
+ import glob
4
+ from main import generate_paper_poster
5
+ from tqdm import tqdm
6
+ import concurrent.futures
7
+
8
+
9
+ def process_papers(input_dir, output_dir, url, model):
10
+ os.makedirs(output_dir, exist_ok=True)
11
+
12
+ paper_files = os.listdir(input_dir)
13
+ pdf_files = [
14
+ os.path.join(input_dir, file, "paper.pdf")
15
+ for file in paper_files
16
+ if os.path.isdir(os.path.join(input_dir, file))
17
+ ]
18
+
19
+ def process_single_pdf(pdf_file):
20
+ try:
21
+ file_id = os.path.basename(os.path.dirname(pdf_file))
22
+ poster_dir = os.path.join(output_dir, file_id)
23
+ os.makedirs(poster_dir, exist_ok=True)
24
+ output_file = os.path.join(poster_dir, "poster.json")
25
+ output_png = os.path.join(poster_dir, "poster.png")
26
+
27
+ if os.path.exists(output_file) and os.path.exists(output_png):
28
+ print(f"跳过已存在的文件: {output_file}")
29
+ return
30
+
31
+ generate_paper_poster(
32
+ url=url,
33
+ pdf=pdf_file,
34
+ model=model,
35
+ output=output_file,
36
+ text_prompt=" ",
37
+ figures_prompt=" ",
38
+ )
39
+ print(f"成功生成: {output_file}")
40
+
41
+ except Exception as e:
42
+ print(f"处理文件 {pdf_file} 时出错: {e}")
43
+
44
+ with concurrent.futures.ThreadPoolExecutor(max_workers=16) as executor:
45
+ futures = [
46
+ executor.submit(process_single_pdf, pdf_file) for pdf_file in pdf_files
47
+ ]
48
+
49
+ for _ in tqdm(
50
+ concurrent.futures.as_completed(futures),
51
+ total=len(futures),
52
+ desc=f"处理文件 {model}",
53
+ ):
54
+ pass
55
+
56
+
57
+ if __name__ == "__main__":
58
+ url = ""
59
+ input_dir = "eval/data"
60
+ models = []
61
+ for model in models:
62
+ output_dir = f"eval/temp-v2/{model.replace('/', '-')}"
63
+ process_papers(input_dir, output_dir, url, model)