File size: 1,936 Bytes
04aed77 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 |
import logging
import os
import glob
from main import generate_paper_poster
from tqdm import tqdm
import concurrent.futures
def process_papers(input_dir, output_dir, url, model):
os.makedirs(output_dir, exist_ok=True)
paper_files = os.listdir(input_dir)
pdf_files = [
os.path.join(input_dir, file, "paper.pdf")
for file in paper_files
if os.path.isdir(os.path.join(input_dir, file))
]
def process_single_pdf(pdf_file):
try:
file_id = os.path.basename(os.path.dirname(pdf_file))
poster_dir = os.path.join(output_dir, file_id)
os.makedirs(poster_dir, exist_ok=True)
output_file = os.path.join(poster_dir, "poster.json")
output_png = os.path.join(poster_dir, "poster.png")
if os.path.exists(output_file) and os.path.exists(output_png):
print(f"跳过已存在的文件: {output_file}")
return
generate_paper_poster(
url=url,
pdf=pdf_file,
model=model,
output=output_file,
text_prompt=" ",
figures_prompt=" ",
)
print(f"成功生成: {output_file}")
except Exception as e:
print(f"处理文件 {pdf_file} 时出错: {e}")
with concurrent.futures.ThreadPoolExecutor(max_workers=16) as executor:
futures = [
executor.submit(process_single_pdf, pdf_file) for pdf_file in pdf_files
]
for _ in tqdm(
concurrent.futures.as_completed(futures),
total=len(futures),
desc=f"处理文件 {model}",
):
pass
if __name__ == "__main__":
url = ""
input_dir = "eval/data"
models = []
for model in models:
output_dir = f"eval/temp-v2/{model.replace('/', '-')}"
process_papers(input_dir, output_dir, url, model)
|