File size: 553 Bytes
a2682b3
 
 
 
 
 
 
97e0c69
 
 
 
a2682b3
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
import spacy
import nltk

class NLPModel:
    def __init__(self):
        self.nlp = spacy.load("pt_core_news_md")
        nltk.download('punkt')
        
    def __call__(self, text: str):
        """Makes the model callable like model(text)."""
        return self.extract_entities(text)  # or another default method
    
    def extract_entities(self, text: str):
        doc = self.nlp(text)
        return [(ent.text.lower(), ent.label_) for ent in doc.ents]
    
    def tokenize_sentences(self, text: str):
        return nltk.sent_tokenize(text)