Update chunking.py (#5)
Browse files- Update chunking.py (04a10ad976b8d025af1b6047c3a2e92de9f3c74c)
Co-authored-by: Manas Korada <mkorada@users.noreply.huggingface.co>
- chunking.py +9 -1
chunking.py
CHANGED
@@ -15,9 +15,17 @@ def get_max_word_length(target_languages: list[str]) -> int:
|
|
15 |
"cs": 140,
|
16 |
"da": 140,
|
17 |
"de": 150,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
}
|
19 |
|
20 |
-
max_word_length = 700 # Default
|
21 |
|
22 |
for lang in target_languages:
|
23 |
if lang in helsinki_word_limits:
|
|
|
15 |
"cs": 140,
|
16 |
"da": 140,
|
17 |
"de": 150,
|
18 |
+
"pl": 130, # New Languages Start Here Need To Test
|
19 |
+
"mk": 60,
|
20 |
+
"es": 50,
|
21 |
+
"it": 60,
|
22 |
+
"is": 90,
|
23 |
+
"tr": 60,
|
24 |
+
"sl": 150,
|
25 |
+
"no": 50,
|
26 |
}
|
27 |
|
28 |
+
max_word_length = 700 # Default Chunk Size
|
29 |
|
30 |
for lang in target_languages:
|
31 |
if lang in helsinki_word_limits:
|