suntao.0 commited on
Commit
2fbe3fb
·
1 Parent(s): 1962d1d
Files changed (2) hide show
  1. requirements.txt +1 -1
  2. start.py +0 -63
requirements.txt CHANGED
@@ -2,7 +2,7 @@ cairosvg==2.8.2
2
  doclayout_yolo==0.0.3
3
  fastapi==0.115.12
4
  fire==0.7.0
5
- fitz==0.0.1.dev2
6
  gradio==5.32.1
7
  huggingface_hub==0.32.4
8
  langchain==0.3.25
 
2
  doclayout_yolo==0.0.3
3
  fastapi==0.115.12
4
  fire==0.7.0
5
+ PyMuPDF==1.24.1
6
  gradio==5.32.1
7
  huggingface_hub==0.32.4
8
  langchain==0.3.25
start.py DELETED
@@ -1,63 +0,0 @@
1
- import logging
2
- import os
3
- import glob
4
- from main import generate_paper_poster
5
- from tqdm import tqdm
6
- import concurrent.futures
7
-
8
-
9
- def process_papers(input_dir, output_dir, url, model):
10
- os.makedirs(output_dir, exist_ok=True)
11
-
12
- paper_files = os.listdir(input_dir)
13
- pdf_files = [
14
- os.path.join(input_dir, file, "paper.pdf")
15
- for file in paper_files
16
- if os.path.isdir(os.path.join(input_dir, file))
17
- ]
18
-
19
- def process_single_pdf(pdf_file):
20
- try:
21
- file_id = os.path.basename(os.path.dirname(pdf_file))
22
- poster_dir = os.path.join(output_dir, file_id)
23
- os.makedirs(poster_dir, exist_ok=True)
24
- output_file = os.path.join(poster_dir, "poster.json")
25
- output_png = os.path.join(poster_dir, "poster.png")
26
-
27
- if os.path.exists(output_file) and os.path.exists(output_png):
28
- print(f"跳过已存在的文件: {output_file}")
29
- return
30
-
31
- generate_paper_poster(
32
- url=url,
33
- pdf=pdf_file,
34
- model=model,
35
- output=output_file,
36
- text_prompt=" ",
37
- figures_prompt=" ",
38
- )
39
- print(f"成功生成: {output_file}")
40
-
41
- except Exception as e:
42
- print(f"处理文件 {pdf_file} 时出错: {e}")
43
-
44
- with concurrent.futures.ThreadPoolExecutor(max_workers=16) as executor:
45
- futures = [
46
- executor.submit(process_single_pdf, pdf_file) for pdf_file in pdf_files
47
- ]
48
-
49
- for _ in tqdm(
50
- concurrent.futures.as_completed(futures),
51
- total=len(futures),
52
- desc=f"处理文件 {model}",
53
- ):
54
- pass
55
-
56
-
57
- if __name__ == "__main__":
58
- url = ""
59
- input_dir = "eval/data"
60
- models = []
61
- for model in models:
62
- output_dir = f"eval/temp-v2/{model.replace('/', '-')}"
63
- process_papers(input_dir, output_dir, url, model)