|
import logging |
|
import os |
|
import glob |
|
from main import generate_paper_poster |
|
from tqdm import tqdm |
|
import concurrent.futures |
|
|
|
|
|
def process_papers(input_dir, output_dir, url, model): |
|
os.makedirs(output_dir, exist_ok=True) |
|
|
|
paper_files = os.listdir(input_dir) |
|
pdf_files = [ |
|
os.path.join(input_dir, file, "paper.pdf") |
|
for file in paper_files |
|
if os.path.isdir(os.path.join(input_dir, file)) |
|
] |
|
|
|
def process_single_pdf(pdf_file): |
|
try: |
|
file_id = os.path.basename(os.path.dirname(pdf_file)) |
|
poster_dir = os.path.join(output_dir, file_id) |
|
os.makedirs(poster_dir, exist_ok=True) |
|
output_file = os.path.join(poster_dir, "poster.json") |
|
output_png = os.path.join(poster_dir, "poster.png") |
|
|
|
if os.path.exists(output_file) and os.path.exists(output_png): |
|
print(f"跳过已存在的文件: {output_file}") |
|
return |
|
|
|
generate_paper_poster( |
|
url=url, |
|
pdf=pdf_file, |
|
model=model, |
|
output=output_file, |
|
text_prompt=" ", |
|
figures_prompt=" ", |
|
) |
|
print(f"成功生成: {output_file}") |
|
|
|
except Exception as e: |
|
print(f"处理文件 {pdf_file} 时出错: {e}") |
|
|
|
with concurrent.futures.ThreadPoolExecutor(max_workers=16) as executor: |
|
futures = [ |
|
executor.submit(process_single_pdf, pdf_file) for pdf_file in pdf_files |
|
] |
|
|
|
for _ in tqdm( |
|
concurrent.futures.as_completed(futures), |
|
total=len(futures), |
|
desc=f"处理文件 {model}", |
|
): |
|
pass |
|
|
|
|
|
if __name__ == "__main__": |
|
url = "" |
|
input_dir = "eval/data" |
|
models = [] |
|
for model in models: |
|
output_dir = f"eval/temp-v2/{model.replace('/', '-')}" |
|
process_papers(input_dir, output_dir, url, model) |
|
|