File size: 1,936 Bytes
04aed77
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import logging
import os
import glob
from main import generate_paper_poster
from tqdm import tqdm
import concurrent.futures


def process_papers(input_dir, output_dir, url, model):
    os.makedirs(output_dir, exist_ok=True)

    paper_files = os.listdir(input_dir)
    pdf_files = [
        os.path.join(input_dir, file, "paper.pdf")
        for file in paper_files
        if os.path.isdir(os.path.join(input_dir, file))
    ]

    def process_single_pdf(pdf_file):
        try:
            file_id = os.path.basename(os.path.dirname(pdf_file))
            poster_dir = os.path.join(output_dir, file_id)
            os.makedirs(poster_dir, exist_ok=True)
            output_file = os.path.join(poster_dir, "poster.json")
            output_png = os.path.join(poster_dir, "poster.png")

            if os.path.exists(output_file) and os.path.exists(output_png):
                print(f"跳过已存在的文件: {output_file}")
                return

            generate_paper_poster(
                url=url,
                pdf=pdf_file,
                model=model,
                output=output_file,
                text_prompt=" ",
                figures_prompt=" ",
            )
            print(f"成功生成: {output_file}")

        except Exception as e:
            print(f"处理文件 {pdf_file} 时出错: {e}")

    with concurrent.futures.ThreadPoolExecutor(max_workers=16) as executor:
        futures = [
            executor.submit(process_single_pdf, pdf_file) for pdf_file in pdf_files
        ]

        for _ in tqdm(
            concurrent.futures.as_completed(futures),
            total=len(futures),
            desc=f"处理文件 {model}",
        ):
            pass


if __name__ == "__main__":
    url = ""
    input_dir = "eval/data"
    models = []
    for model in models:
        output_dir = f"eval/temp-v2/{model.replace('/', '-')}"
        process_papers(input_dir, output_dir, url, model)