Update app.py
Browse files
app.py
CHANGED
@@ -3,8 +3,10 @@ from transformers import pipeline
|
|
3 |
from transformers import AutoModelForSeq2SeqLM
|
4 |
from transformers import AutoTokenizer
|
5 |
from transformers import GenerationConfig
|
|
|
|
|
6 |
|
7 |
-
model_name='google/flan-t5-
|
8 |
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
|
9 |
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
|
10 |
|
@@ -31,16 +33,28 @@ def make_prompt(sentence):
|
|
31 |
|
32 |
return prompt
|
33 |
|
34 |
-
def
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
|
45 |
demo = gr.Blocks()
|
46 |
|
|
|
3 |
from transformers import AutoModelForSeq2SeqLM
|
4 |
from transformers import AutoTokenizer
|
5 |
from transformers import GenerationConfig
|
6 |
+
from nltk.tokenize import sent_tokenize
|
7 |
+
import re
|
8 |
|
9 |
+
model_name='google/flan-t5-small'
|
10 |
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
|
11 |
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
|
12 |
|
|
|
33 |
|
34 |
return prompt
|
35 |
|
36 |
+
def split_conj(text):
|
37 |
+
return map(str.strip, re.sub('(but|yet)', "|", text).split('|'))
|
38 |
+
|
39 |
+
def get_sentiment_from_llm(review_text):
|
40 |
+
sentences = sent_tokenize(review_text)
|
41 |
+
segments = []
|
42 |
+
for sen in sentences:
|
43 |
+
segments.append(split_conj(sen))
|
44 |
+
|
45 |
+
ls_outputs=[]
|
46 |
+
for seg in segments:
|
47 |
+
input = make_prompt(seg)
|
48 |
+
inputs = tokenizer(input, return_tensors='pt')
|
49 |
+
output = tokenizer.decode(
|
50 |
+
model.generate(
|
51 |
+
inputs["input_ids"],
|
52 |
+
max_new_tokens=100,
|
53 |
+
)[0],
|
54 |
+
skip_special_tokens=True)
|
55 |
+
ls_outputs.append("\n".join(output.split('PPrint ')))
|
56 |
+
|
57 |
+
return "\n".join(ls_outputs)
|
58 |
|
59 |
demo = gr.Blocks()
|
60 |
|