seanp03 mkorada commited on
Commit
74b0636
·
verified ·
1 Parent(s): 282b03d

Update chunking.py (#5)

Browse files

- Update chunking.py (04a10ad976b8d025af1b6047c3a2e92de9f3c74c)


Co-authored-by: Manas Korada <mkorada@users.noreply.huggingface.co>

Files changed (1) hide show
  1. chunking.py +9 -1
chunking.py CHANGED
@@ -15,9 +15,17 @@ def get_max_word_length(target_languages: list[str]) -> int:
15
  "cs": 140,
16
  "da": 140,
17
  "de": 150,
 
 
 
 
 
 
 
 
18
  }
19
 
20
- max_word_length = 700 # Default for non-Helsinki languages
21
 
22
  for lang in target_languages:
23
  if lang in helsinki_word_limits:
 
15
  "cs": 140,
16
  "da": 140,
17
  "de": 150,
18
+ "pl": 130, # New Languages Start Here Need To Test
19
+ "mk": 60,
20
+ "es": 50,
21
+ "it": 60,
22
+ "is": 90,
23
+ "tr": 60,
24
+ "sl": 150,
25
+ "no": 50,
26
  }
27
 
28
+ max_word_length = 700 # Default Chunk Size
29
 
30
  for lang in target_languages:
31
  if lang in helsinki_word_limits: