File size: 896 Bytes
a2682b3 97e0c69 a2682b3 e00c07d a2682b3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 |
import spacy
import nltk
class NLPModel:
def __init__(self):
self.nlp = spacy.load("pt_core_news_md")
nltk.download('punkt')
def __call__(self, text: str):
"""Makes the model callable like model(text)."""
return self.extract_entities(text) # or another default method
def extract_entities(self, text: str):
if isinstance(text, list): # If input is a list of sentences
entities = []
for sentence in text:
doc = self.nlp(sentence)
entities.extend([(ent.text.lower(), ent.label_) for ent in doc.ents])
return entities
else: # If input is a single string
doc = self.nlp(text)
return [(ent.text.lower(), ent.label_) for ent in doc.ents]
def tokenize_sentences(self, text: str):
return nltk.sent_tokenize(text) |