mihalykiss commited on
Commit
24254c1
·
verified ·
1 Parent(s): c04032f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -6
app.py CHANGED
@@ -38,14 +38,14 @@ def clean_text(text: str) -> str:
38
  return text
39
 
40
 
41
- newline_to_space = Replace(Regex(r'\s*\n\s*'), " ")
42
- join_hyphen_break = Replace(Regex(r'(\w+)[--]\s*\n\s*(\w+)'), r"\1\2")
43
 
44
  tokenizer.backend_tokenizer.normalizer = Sequence([
45
- tokenizer.backend_tokenizer.normalizer,
46
- join_hyphen_break,
47
- newline_to_space,
48
- Strip()
49
  ])
50
 
51
  def classify_text(text):
 
38
  return text
39
 
40
 
41
+ newline_to_space = Replace(Regex(r"\s*\n\s*"), " ")
42
+ join_hyphen_break = Replace(Regex(r"(\w+)-\s*\n\s*(\w+)"), r"\1\2")
43
 
44
  tokenizer.backend_tokenizer.normalizer = Sequence([
45
+ NFKC(),
46
+ join_hyphen_break,
47
+ newline_to_space,
48
+ Strip()
49
  ])
50
 
51
  def classify_text(text):