Spaces:
Sleeping
Sleeping
edit preprocess
Browse files
app.py
CHANGED
|
@@ -28,10 +28,6 @@ with open('lemma_dict.pkl', 'rb') as f:
|
|
| 28 |
nlp = spacy.load('en_core_web_lg', disable=['parser', 'ner', 'tagger'])
|
| 29 |
nlp.vocab.add_flag(lambda s: s.lower() in spacy.lang.en.stop_words.STOP_WORDS, spacy.attrs.IS_STOP)
|
| 30 |
|
| 31 |
-
OOV_INDEX = 0
|
| 32 |
-
word_dict = {"<OOV>": OOV_INDEX} # OOV token at index 0.
|
| 33 |
-
word_index = 1
|
| 34 |
-
|
| 35 |
def preprocess_text(text):
|
| 36 |
"""Preprocess the input text using SpaCy and return word indices."""
|
| 37 |
docs = nlp.pipe([text], n_process=1)
|
|
@@ -40,7 +36,7 @@ def preprocess_text(text):
|
|
| 40 |
for token in doc:
|
| 41 |
if token.pos_ != "PUNCT":
|
| 42 |
if token.text not in word_dict:
|
| 43 |
-
word_dict[token.text] = OOV_INDEX
|
| 44 |
word_seq.append(word_dict[token.text])
|
| 45 |
return word_seq
|
| 46 |
|
|
|
|
| 28 |
nlp = spacy.load('en_core_web_lg', disable=['parser', 'ner', 'tagger'])
|
| 29 |
nlp.vocab.add_flag(lambda s: s.lower() in spacy.lang.en.stop_words.STOP_WORDS, spacy.attrs.IS_STOP)
|
| 30 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
def preprocess_text(text):
|
| 32 |
"""Preprocess the input text using SpaCy and return word indices."""
|
| 33 |
docs = nlp.pipe([text], n_process=1)
|
|
|
|
| 36 |
for token in doc:
|
| 37 |
if token.pos_ != "PUNCT":
|
| 38 |
if token.text not in word_dict:
|
| 39 |
+
word_dict[token.text] = 0 # OOV_INDEX
|
| 40 |
word_seq.append(word_dict[token.text])
|
| 41 |
return word_seq
|
| 42 |
|