om4r932 commited on
Commit
fd28bbf
·
1 Parent(s): fef4b3f

Remove useless

Browse files
scripts/old/spec_doc_indexer_multi.py DELETED
@@ -1,278 +0,0 @@
1
- import datetime
2
- import time
3
- import sys
4
- import json
5
- import traceback
6
- import requests
7
- import zipfile
8
- import uuid
9
- import os
10
- import io
11
- import re
12
- import subprocess
13
- import concurrent.futures
14
- import threading
15
- from io import StringIO, BytesIO
16
- from typing import List, Dict, Any
17
-
18
- import pandas as pd
19
- import numpy as np
20
- import warnings
21
-
22
- warnings.filterwarnings("ignore")
23
-
24
- # Caractères pour le formatage des versions
25
- chars = "0123456789abcdefghijklmnopqrstuvwxyz"
26
-
27
- # Verrous pour les opérations thread-safe
28
- print_lock = threading.Lock()
29
- dict_lock = threading.Lock()
30
- scope_lock = threading.Lock()
31
-
32
- # Dictionnaires globaux
33
- indexed_specifications = {}
34
- documents_by_spec_num = {}
35
- processed_count = 0
36
- total_count = 0
37
-
38
- regex = r"^(\d+[a-z]?(?:\.\d+)*)\t[\ \S]+$"
39
-
40
- def get_text(specification: str, version: str):
41
- """Récupère les bytes du PDF à partir d'une spécification et d'une version."""
42
- doc_id = specification
43
- series = doc_id.split(".")[0]
44
-
45
- response = requests.get(
46
- f"https://www.3gpp.org/ftp/Specs/archive/{series}_series/{doc_id}/{doc_id.replace('.', '')}-{version}.zip",
47
- verify=False,
48
- headers={"User-Agent": 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'}
49
- )
50
-
51
- if response.status_code != 200:
52
- raise Exception(f"Téléchargement du ZIP échoué pour {specification}-{version}")
53
-
54
- zip_bytes = io.BytesIO(response.content)
55
-
56
- with zipfile.ZipFile(zip_bytes) as zf:
57
- for file_name in zf.namelist():
58
- if file_name.endswith("zip"):
59
- print("Another ZIP !")
60
- zip_bytes = io.BytesIO(zf.read(file_name))
61
- zf = zipfile.ZipFile(zip_bytes)
62
- for file_name2 in zf.namelist():
63
- if file_name2.endswith("doc") or file_name2.endswith("docx"):
64
- if "cover" in file_name2.lower():
65
- print("COVER !")
66
- continue
67
- ext = file_name2.split(".")[-1]
68
- doc_bytes = zf.read(file_name2)
69
- temp_id = str(uuid.uuid4())
70
- input_path = f"/tmp/{temp_id}.{ext}"
71
- output_path = f"/tmp/{temp_id}.txt"
72
-
73
- with open(input_path, "wb") as f:
74
- f.write(doc_bytes)
75
-
76
- subprocess.run([
77
- "libreoffice",
78
- "--headless",
79
- "--convert-to", "txt",
80
- "--outdir", "/tmp",
81
- input_path
82
- ], check=True)
83
-
84
- with open(output_path, "r") as f:
85
- txt_data = [line.strip() for line in f if line.strip()]
86
-
87
- os.remove(input_path)
88
- os.remove(output_path)
89
- return txt_data
90
- elif file_name.endswith("doc") or file_name.endswith("docx"):
91
- if "cover" in file_name.lower():
92
- print("COVER !")
93
- continue
94
- ext = file_name.split(".")[-1]
95
- doc_bytes = zf.read(file_name)
96
- temp_id = str(uuid.uuid4())
97
- input_path = f"/tmp/{temp_id}.{ext}"
98
- output_path = f"/tmp/{temp_id}.txt"
99
-
100
- print("Ecriture")
101
- with open(input_path, "wb") as f:
102
- f.write(doc_bytes)
103
-
104
- print("Convertissement")
105
- subprocess.run([
106
- "libreoffice",
107
- "--headless",
108
- "--convert-to", "txt",
109
- "--outdir", "/tmp",
110
- input_path
111
- ], check=True)
112
-
113
- print("Ecriture TXT")
114
- with open(output_path, "r", encoding="utf-8") as f:
115
- txt_data = [line.strip() for line in f if line.strip()]
116
-
117
- os.remove(input_path)
118
- os.remove(output_path)
119
- return txt_data
120
-
121
- raise Exception(f"Aucun fichier .doc/.docx trouvé dans le ZIP pour {specification}-{version}")
122
-
123
- def get_spec_content(specification: str, version: str):
124
- text = get_text(specification, version)
125
- forewords = []
126
- for x in range(len(text)):
127
- line = text[x]
128
- if "Foreword" in line:
129
- forewords.append(x)
130
- if len(forewords) >= 2:
131
- break
132
-
133
- toc_brut = text[forewords[1]:]
134
- chapters = []
135
- for line in toc_brut:
136
- x = line.split("\t")
137
- m = re.search(regex, line)
138
- if m and any(line in c for c in text[forewords[0]:forewords[1]]):
139
- chapters.append(line)
140
- print(line)
141
-
142
- real_toc_indexes = {}
143
-
144
- for chapter in chapters:
145
- x = text.index(chapter)
146
- real_toc_indexes[chapter] = x
147
-
148
- document = {}
149
- toc = list(real_toc_indexes.keys())
150
- index_toc = list(real_toc_indexes.values())
151
- curr_index = 0
152
- for x in range(1, len(toc)):
153
- document[toc[curr_index].replace("\t", " ")] = re.sub(r"[\ \t]+", " ", "\n".join(text[index_toc[curr_index]+1:index_toc[x]]))
154
- curr_index = x
155
-
156
- document[toc[curr_index].replace("\t", " ")] = re.sub(r"\s+", " ", " ".join(text[index_toc[curr_index]+1:]))
157
- print(len(toc)-1, toc[curr_index], curr_index)
158
- return document
159
-
160
- def process_specification(spec: Dict[str, Any], columns: List[str]) -> None:
161
- """Traite une spécification individuelle avec multithreading."""
162
- global processed_count, indexed_specifications, documents_by_spec_num
163
-
164
- try:
165
- if spec.get('vers', None) is None:
166
- return
167
-
168
- doc_id = str(spec["spec_num"])
169
- series = doc_id.split(".")[0]
170
-
171
- a, b, c = str(spec["vers"]).split(".")
172
-
173
- # Formatage de l'URL selon la version
174
- if not (int(a) > 35 or int(b) > 35 or int(c) > 35):
175
- version_code = f"{chars[int(a)]}{chars[int(b)]}{chars[int(c)]}"
176
- spec_url = f"https://www.3gpp.org/ftp/Specs/archive/{series}_series/{doc_id}/{doc_id.replace('.', '')}-{version_code}.zip"
177
- else:
178
- x, y, z = str(a), str(b), str(c)
179
- while len(x) < 2:
180
- x = "0" + x
181
- while len(y) < 2:
182
- y = "0" + y
183
- while len(z) < 2:
184
- z = "0" + z
185
- version_code = f"{x}{y}{z}"
186
- spec_url = f"https://www.3gpp.org/ftp/Specs/archive/{series}_series/{doc_id}/{doc_id.replace('.', '')}-{version_code}.zip"
187
-
188
- string = f"{spec['spec_num']}+-+{spec['title']}+-+{spec['type']}+-+{spec['vers']}+-+{spec['WG']}+-+Rel-{spec['vers'].split('.')[0]}"
189
-
190
- metadata = {
191
- "id": str(spec["spec_num"]),
192
- "title": spec["title"],
193
- "type": spec["type"],
194
- "release": str(spec["vers"].split(".")[0]),
195
- "version": str(spec["vers"]),
196
- "working_group": spec["WG"],
197
- "url": spec_url
198
- }
199
-
200
- # Mise à jour du dictionnaire global avec verrou
201
- with dict_lock:
202
- indexed_specifications[string] = metadata
203
- processed_count += 1
204
-
205
- # Affichage de la progression avec verrou
206
- with print_lock:
207
- sys.stdout.write(f"\rTraitement: {processed_count}/{total_count} spécifications")
208
- sys.stdout.flush()
209
-
210
- except Exception as e:
211
- with print_lock:
212
- print(f"\nErreur lors du traitement de {spec.get('spec_num', 'inconnu')}: {str(e)}")
213
-
214
- def main():
215
- global total_count
216
- start_time = time.time()
217
-
218
- # Récupération des spécifications depuis le site 3GPP
219
- print("Récupération des spécifications depuis 3GPP...")
220
- response = requests.get(
221
- f'https://www.3gpp.org/dynareport?code=status-report.htm',
222
- headers={"User-Agent": 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'},
223
- verify=False
224
- )
225
-
226
- # Analyse des tableaux HTML
227
- dfs = pd.read_html(
228
- StringIO(response.text),
229
- storage_options={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'},
230
- encoding="utf-8"
231
- )
232
-
233
- for x in range(len(dfs)):
234
- dfs[x] = dfs[x].replace({np.nan: None})
235
-
236
- # Extraction des colonnes nécessaires
237
- columns_needed = [0, 1, 2, 3, 4]
238
- extracted_dfs = [df.iloc[:, columns_needed] for df in dfs]
239
- columns = [x.replace("\xa0", "_") for x in extracted_dfs[0].columns]
240
-
241
- # Préparation des spécifications
242
- specifications = []
243
- for df in extracted_dfs:
244
- for index, row in df.iterrows():
245
- doc = row.to_list()
246
- doc_dict = dict(zip(columns, doc))
247
- specifications.append(doc_dict)
248
-
249
- total_count = len(specifications)
250
- print(f"Traitement de {total_count} spécifications avec multithreading...")
251
-
252
- try:
253
- # Vérification si un fichier de documents existe déjà
254
- if os.path.exists("indexed_docs_content.zip"):
255
- with zipfile.ZipFile(open("indexed_docs_content.zip", "rb")) as zf:
256
- for file_name in zf.namelist():
257
- if file_name.endswith(".json"):
258
- doc_bytes = zf.read(file_name)
259
- global documents_by_spec_num
260
- documents_by_spec_num = json.loads(doc_bytes.decode("utf-8"))
261
- print(f"Chargement de {len(documents_by_spec_num)} documents depuis le cache.")
262
-
263
- # Utilisation de ThreadPoolExecutor pour le multithreading
264
- with concurrent.futures.ThreadPoolExecutor(max_workers=100) as executor:
265
- futures = [executor.submit(process_specification, spec, columns) for spec in specifications]
266
- concurrent.futures.wait(futures)
267
-
268
- finally:
269
- json_str = json.dumps(documents_by_spec_num, indent=4, ensure_ascii=False)
270
- json_bytes = json_str.encode("utf-8")
271
- with zipfile.ZipFile("indexed_docs_content.zip", "w", compression=zipfile.ZIP_DEFLATED) as archive:
272
- archive.writestr("indexed_documents.json", json_bytes)
273
- elapsed_time = time.time() - start_time
274
- print(f"\nTraitement terminé en {elapsed_time:.2f} secondes")
275
- print(f"Résultats sauvegardés dans l'archive ZIP")
276
-
277
- if __name__ == "__main__":
278
- main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
scripts/old/spec_indexer_multi.py DELETED
@@ -1,294 +0,0 @@
1
- import datetime
2
- import time
3
- import sys
4
- import json
5
- import traceback
6
- import requests
7
- import zipfile
8
- import uuid
9
- import os
10
- import re
11
- import subprocess
12
- import concurrent.futures
13
- import threading
14
- from io import StringIO, BytesIO
15
- from typing import List, Dict, Any
16
-
17
- import pandas as pd
18
- import numpy as np
19
- import warnings
20
-
21
- warnings.filterwarnings("ignore")
22
-
23
- # Caractères pour le formatage des versions
24
- chars = "0123456789abcdefghijklmnopqrstuvwxyz"
25
-
26
- # Verrous pour les opérations thread-safe
27
- print_lock = threading.Lock()
28
- dict_lock = threading.Lock()
29
- scope_lock = threading.Lock()
30
-
31
- # Dictionnaires globaux
32
- indexed_specifications = {}
33
- scopes_by_spec_num = {}
34
- processed_count = 0
35
- total_count = 0
36
-
37
- def get_text(specification: str, version: str):
38
- """Récupère les bytes du PDF à partir d'une spécification et d'une version."""
39
- doc_id = specification
40
- series = doc_id.split(".")[0]
41
-
42
- response = requests.get(
43
- f"https://www.3gpp.org/ftp/Specs/archive/{series}_series/{doc_id}/{doc_id.replace('.', '')}-{version}.zip",
44
- verify=False,
45
- headers={"User-Agent": 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'}
46
- )
47
-
48
- if response.status_code != 200:
49
- raise Exception(f"Téléchargement du ZIP échoué pour {specification}-{version}")
50
-
51
- zip_bytes = BytesIO(response.content)
52
-
53
- with zipfile.ZipFile(zip_bytes) as zf:
54
- for file_name in zf.namelist():
55
- if file_name.endswith("zip"):
56
- print("Another ZIP !")
57
- zip_bytes = BytesIO(zf.read(file_name))
58
- zf = zipfile.ZipFile(zip_bytes)
59
- for file_name2 in zf.namelist():
60
- if file_name2.endswith("doc") or file_name2.endswith("docx"):
61
- if "cover" in file_name2.lower():
62
- print("COVER !")
63
- continue
64
- ext = file_name2.split(".")[-1]
65
- doc_bytes = zf.read(file_name2)
66
- temp_id = str(uuid.uuid4())
67
- input_path = f"/tmp/{temp_id}.{ext}"
68
- output_path = f"/tmp/{temp_id}.txt"
69
-
70
- with open(input_path, "wb") as f:
71
- f.write(doc_bytes)
72
-
73
- subprocess.run([
74
- "libreoffice",
75
- "--headless",
76
- "--convert-to", "txt",
77
- "--outdir", "/tmp",
78
- input_path
79
- ], check=True)
80
-
81
- with open(output_path, "r") as f:
82
- txt_data = [line.strip() for line in f if line.strip()]
83
-
84
- os.remove(input_path)
85
- os.remove(output_path)
86
- return txt_data
87
- elif file_name.endswith("doc") or file_name.endswith("docx"):
88
- if "cover" in file_name.lower():
89
- print("COVER !")
90
- continue
91
- ext = file_name.split(".")[-1]
92
- doc_bytes = zf.read(file_name)
93
- temp_id = str(uuid.uuid4())
94
- input_path = f"/tmp/{temp_id}.{ext}"
95
- output_path = f"/tmp/{temp_id}.txt"
96
-
97
- print("Ecriture")
98
- with open(input_path, "wb") as f:
99
- f.write(doc_bytes)
100
-
101
- print("Convertissement")
102
- subprocess.run([
103
- "libreoffice",
104
- "--headless",
105
- "--convert-to", "txt",
106
- "--outdir", "/tmp",
107
- input_path
108
- ], check=True)
109
-
110
- print("Ecriture TXT")
111
- with open(output_path, "r", encoding="utf-8") as f:
112
- txt_data = [line.strip() for line in f if line.strip()]
113
-
114
- os.remove(input_path)
115
- os.remove(output_path)
116
- return txt_data
117
-
118
- raise Exception(f"Aucun fichier .doc/.docx trouvé dans le ZIP pour {specification}-{version}")
119
-
120
- def get_scope(specification: str, version: str):
121
- try:
122
- spec_text = get_text(specification, version)
123
- scp_i = 0
124
- nxt_i = 0
125
- for x in range(len(spec_text)):
126
- text = spec_text[x]
127
- if re.search(r"scope$", text, flags=re.IGNORECASE):
128
- scp_i = x
129
- nxt_i = scp_i + 10
130
- if re.search(r"references$", text, flags=re.IGNORECASE):
131
- nxt_i = x
132
-
133
- return re.sub(r"\s+", " ", " ".join(spec_text[scp_i+1:nxt_i])) if len(spec_text[scp_i+1:nxt_i]) < 2 else "Not found"
134
- except Exception as e:
135
- traceback.print_exception(e)
136
- return "Not found (error)"
137
-
138
- def process_specification(spec: Dict[str, Any], columns: List[str]) -> None:
139
- """Traite une spécification individuelle avec multithreading."""
140
- global processed_count, indexed_specifications, scopes_by_spec_num
141
-
142
- try:
143
- if spec.get('vers', None) is None:
144
- return
145
-
146
- doc_id = str(spec["spec_num"])
147
- series = doc_id.split(".")[0]
148
-
149
- a, b, c = str(spec["vers"]).split(".")
150
-
151
- # Formatage de l'URL selon la version
152
- if not (int(a) > 35 or int(b) > 35 or int(c) > 35):
153
- version_code = f"{chars[int(a)]}{chars[int(b)]}{chars[int(c)]}"
154
- spec_url = f"https://www.3gpp.org/ftp/Specs/archive/{series}_series/{doc_id}/{doc_id.replace('.', '')}-{version_code}.zip"
155
- else:
156
- x, y, z = str(a), str(b), str(c)
157
- while len(x) < 2:
158
- x = "0" + x
159
- while len(y) < 2:
160
- y = "0" + y
161
- while len(z) < 2:
162
- z = "0" + z
163
- version_code = f"{x}{y}{z}"
164
- spec_url = f"https://www.3gpp.org/ftp/Specs/archive/{series}_series/{doc_id}/{doc_id.replace('.', '')}-{version_code}.zip"
165
-
166
- string = f"{spec['spec_num']}+-+{spec['title']}+-+{spec['type']}+-+{spec['vers']}+-+{spec['WG']}+-+Rel-{spec['vers'].split('.')[0]}"
167
-
168
- metadata = {
169
- "id": str(spec["spec_num"]),
170
- "title": spec["title"],
171
- "type": spec["type"],
172
- "release": str(spec["vers"].split(".")[0]),
173
- "version": str(spec["vers"]),
174
- "working_group": spec["WG"],
175
- "url": spec_url
176
- }
177
-
178
- # Vérification si le scope existe déjà pour ce numéro de spécification
179
- spec_num = str(spec["spec_num"])
180
-
181
- with scope_lock:
182
- if spec_num in scopes_by_spec_num:
183
- # Réutilisation du scope existant
184
- metadata["scope"] = scopes_by_spec_num[spec_num]
185
- with print_lock:
186
- print(f"\nRéutilisation du scope pour {spec_num}")
187
- else:
188
- # Extraction du scope seulement si nécessaire
189
- if not (int(a) > 35 or int(b) > 35 or int(c) > 35):
190
- version_for_scope = f"{chars[int(a)]}{chars[int(b)]}{chars[int(c)]}"
191
- else:
192
- version_for_scope = version_code
193
-
194
- with print_lock:
195
- print(f"\nExtraction du scope pour {spec_num} (version {version_for_scope})")
196
-
197
- try:
198
- scope = get_scope(metadata["id"], version_for_scope)
199
- # Stockage du scope pour une utilisation future
200
- scopes_by_spec_num[spec_num] = scope
201
- metadata["scope"] = scope
202
- except Exception as e:
203
- error_msg = f"Erreur lors de l'extraction du scope: {str(e)}"
204
- metadata["scope"] = error_msg
205
- scopes_by_spec_num[spec_num] = error_msg
206
-
207
- # Mise à jour du dictionnaire global avec verrou
208
- with dict_lock:
209
- string += f"+-+{metadata['scope']}" if metadata['scope'] != " " or metadata['scope'] != "" or "not found" not in metadata['scope'].lower() else ""
210
- indexed_specifications[string] = metadata
211
- processed_count += 1
212
-
213
- # Affichage de la progression avec verrou
214
- with print_lock:
215
- sys.stdout.write(f"\rTraitement: {processed_count}/{total_count} spécifications")
216
- sys.stdout.flush()
217
-
218
- except Exception as e:
219
- with print_lock:
220
- print(f"\nErreur lors du traitement de {spec.get('spec_num', 'inconnu')}: {str(e)}")
221
-
222
- def main():
223
- global total_count
224
- old_length = 0
225
-
226
- start_time = time.time()
227
-
228
- # Récupération des spécifications depuis le site 3GPP
229
- print("Récupération des spécifications depuis 3GPP...")
230
- response = requests.get(
231
- f'https://www.3gpp.org/dynareport?code=status-report.htm',
232
- headers={"User-Agent": 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'},
233
- verify=False
234
- )
235
-
236
- # Analyse des tableaux HTML
237
- dfs = pd.read_html(
238
- StringIO(response.text),
239
- storage_options={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'},
240
- encoding="utf-8"
241
- )
242
-
243
- for x in range(len(dfs)):
244
- dfs[x] = dfs[x].replace({np.nan: None})
245
-
246
- # Extraction des colonnes nécessaires
247
- columns_needed = [0, 1, 2, 3, 4]
248
- extracted_dfs = [df.iloc[:, columns_needed] for df in dfs]
249
- columns = [x.replace("\xa0", "_") for x in extracted_dfs[0].columns]
250
-
251
- # Préparation des spécifications
252
- specifications = []
253
- for df in extracted_dfs:
254
- for index, row in df.iterrows():
255
- doc = row.to_list()
256
- doc_dict = dict(zip(columns, doc))
257
- specifications.append(doc_dict)
258
-
259
- total_count = len(specifications)
260
- print(f"Traitement de {total_count} spécifications avec multithreading...")
261
-
262
- try:
263
- # Vérification si un fichier de scopes existe déjà
264
- if os.path.exists("indexed_specifications.json"):
265
- with open("indexed_specifications.json", "r", encoding="utf-8") as f:
266
- global scopes_by_spec_num
267
- f_up = json.load(f)
268
- scopes_by_spec_num = f_up['scopes']
269
- before = len(f_up['specs'])
270
- print(f"Chargement de {len(scopes_by_spec_num)} scopes depuis le cache.")
271
-
272
- # Utilisation de ThreadPoolExecutor pour le multithreading
273
- with concurrent.futures.ThreadPoolExecutor(max_workers=100) as executor:
274
- futures = [executor.submit(process_specification, spec, columns) for spec in specifications]
275
- concurrent.futures.wait(futures)
276
-
277
- finally:
278
- # Sauvegarde des résultats
279
- result = {
280
- "specs": indexed_specifications,
281
- "scopes": scopes_by_spec_num,
282
- "last_indexed_date": datetime.datetime.today().strftime("%d-%m-%Y")
283
- }
284
-
285
- with open("indexed_specifications.json", "w", encoding="utf-8") as f:
286
- json.dump(result, f, indent=4, ensure_ascii=False)
287
-
288
- elapsed_time = time.time() - start_time
289
- print(f"\nTraitement terminé en {elapsed_time:.2f} secondes")
290
- print(f"Nouveaux specifications : {len(indexed_specifications) - before}")
291
- print(f"Résultats sauvegardés dans indexed_specifications.json")
292
-
293
- if __name__ == "__main__":
294
- main()