File size: 896 Bytes
a2682b3
 
 
 
 
 
 
97e0c69
 
 
 
a2682b3
 
e00c07d
 
 
 
 
 
 
 
 
 
a2682b3
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
import spacy
import nltk

class NLPModel:
    def __init__(self):
        self.nlp = spacy.load("pt_core_news_md")
        nltk.download('punkt')
        
    def __call__(self, text: str):
        """Makes the model callable like model(text)."""
        return self.extract_entities(text)  # or another default method
    
    def extract_entities(self, text: str):
        if isinstance(text, list):  # If input is a list of sentences
            entities = []
            for sentence in text:
                doc = self.nlp(sentence)
                entities.extend([(ent.text.lower(), ent.label_) for ent in doc.ents])
            return entities
        else:  # If input is a single string
            doc = self.nlp(text)
            return [(ent.text.lower(), ent.label_) for ent in doc.ents]
    
    
    def tokenize_sentences(self, text: str):
        return nltk.sent_tokenize(text)