Lucas ARRIESSE commited on
Commit
dc794b1
·
1 Parent(s): 4633840

Hotfix : change max DL limit 120 -> 30 and limi

Browse files
Files changed (1) hide show
  1. api/docs.py +21 -18
api/docs.py CHANGED
@@ -106,35 +106,38 @@ async def convert_file(contents: io.BytesIO, filename: str, input_ext: str, outp
106
 
107
 
108
  # Rate limit of FTP downloads per minute
109
- FTP_DOWNLOAD_RATE_LIMITER = AsyncLimiter(max_rate=120, time_period=60)
 
 
110
 
111
 
112
  async def get_doc_archive(url: str, client: AsyncClient) -> tuple[str, str, io.BytesIO]:
113
  """Récupère le docx depuis l'URL et le retourne un tuple (nom, extension, contenu)"""
114
 
115
  async with FTP_DOWNLOAD_RATE_LIMITER:
116
- if not url.endswith("zip"):
117
- raise ValueError("URL doit pointer vers un fichier ZIP")
 
118
 
119
- doc_id = os.path.splitext(os.path.basename(url))[0]
120
- resp = await client.get(url, headers={
121
- "User-Agent": 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
122
- })
123
 
124
- resp.raise_for_status()
125
 
126
- with zipfile.ZipFile(io.BytesIO(resp.content)) as zf:
127
- # there should be a single file per file
128
- for entry in zf.infolist():
129
- if entry.is_dir():
130
- continue
131
 
132
- file_name = entry.filename
133
- root, ext = os.path.splitext(file_name)
134
- doc_bytes = zf.read(file_name)
135
- return (root, ext.lower(), io.BytesIO(doc_bytes))
136
 
137
- raise ValueError("Aucun fichier trouvé dans l'archive")
138
 
139
 
140
  def apply_docx_revisions(docx_zip: zipfile.ZipFile) -> io.BytesIO:
 
106
 
107
 
108
  # Rate limit of FTP downloads per minute
109
+ FTP_DOWNLOAD_RATE_LIMITER = AsyncLimiter(max_rate=60, time_period=60)
110
+ # Max number of parallel workers downloading
111
+ FTP_MAX_PARALLEL_WORKERS = asyncio.Semaphore(4)
112
 
113
 
114
  async def get_doc_archive(url: str, client: AsyncClient) -> tuple[str, str, io.BytesIO]:
115
  """Récupère le docx depuis l'URL et le retourne un tuple (nom, extension, contenu)"""
116
 
117
  async with FTP_DOWNLOAD_RATE_LIMITER:
118
+ async with FTP_MAX_PARALLEL_WORKERS:
119
+ if not url.endswith("zip"):
120
+ raise ValueError("URL doit pointer vers un fichier ZIP")
121
 
122
+ doc_id = os.path.splitext(os.path.basename(url))[0]
123
+ resp = await client.get(url, headers={
124
+ "User-Agent": 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
125
+ })
126
 
127
+ resp.raise_for_status()
128
 
129
+ with zipfile.ZipFile(io.BytesIO(resp.content)) as zf:
130
+ # there should be a single file per file
131
+ for entry in zf.infolist():
132
+ if entry.is_dir():
133
+ continue
134
 
135
+ file_name = entry.filename
136
+ root, ext = os.path.splitext(file_name)
137
+ doc_bytes = zf.read(file_name)
138
+ return (root, ext.lower(), io.BytesIO(doc_bytes))
139
 
140
+ raise ValueError("Aucun fichier trouvé dans l'archive")
141
 
142
 
143
  def apply_docx_revisions(docx_zip: zipfile.ZipFile) -> io.BytesIO: