Update app.py
Browse files
app.py
CHANGED
@@ -1,15 +1,12 @@
|
|
1 |
-
|
2 |
import gradio as gr
|
3 |
from transformers import pipeline
|
4 |
from transformers import AutoModelForSeq2SeqLM
|
5 |
from transformers import AutoTokenizer
|
6 |
from transformers import GenerationConfig
|
7 |
import re
|
8 |
-
|
9 |
model_name='google/flan-t5-base'
|
10 |
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
|
11 |
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
|
12 |
-
|
13 |
Examples_to_teach_model="""
|
14 |
Text: I hate apples
|
15 |
Sentiment analysis:
|
@@ -24,43 +21,39 @@ Sentiment analysis:
|
|
24 |
Sentiments: Negative
|
25 |
PPrint Key words: tired, long process
|
26 |
"""
|
27 |
-
|
28 |
def make_prompt(sentence):
|
29 |
prompt = Examples_to_teach_model+ "Text: " + sentence + "Sentiment analysis:"
|
30 |
-
|
31 |
return prompt
|
|
|
|
|
32 |
|
33 |
def get_sentiment_from_llm(review_text):
|
34 |
-
|
35 |
segments=[]
|
36 |
-
|
37 |
-
ls_outputs=[]
|
38 |
-
|
39 |
-
for seg in segments:
|
40 |
if len(sen)>=3:
|
41 |
-
|
42 |
-
|
43 |
-
output = tokenizer.decode(
|
44 |
-
model.generate(
|
45 |
-
inputs["input_ids"],
|
46 |
-
max_new_tokens=100,
|
47 |
-
)[0],
|
48 |
-
skip_special_tokens=True)
|
49 |
-
ls_outputs.append("\n".join(output.split('PPrint ')))
|
50 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
return "\n".join(ls_outputs)
|
52 |
-
|
53 |
demo = gr.Blocks()
|
54 |
-
|
55 |
sentiment_extr = gr.Interface(
|
56 |
fn=get_sentiment_from_llm,
|
57 |
inputs=gr.Textbox(label="Text input", type="text"),
|
58 |
outputs=gr.Textbox(label="Sentiments", type="text"),
|
59 |
title="Sentiments analysis",
|
60 |
-
description="Sentiment analysis and keywords extraction. Powered by prompt tuned flan-t5
|
61 |
)
|
62 |
-
|
63 |
with demo:
|
64 |
gr.TabbedInterface([sentiment_extr], ["Sentiment text analysis"])
|
65 |
-
|
66 |
-
demo.launch()
|
|
|
|
|
1 |
import gradio as gr
|
2 |
from transformers import pipeline
|
3 |
from transformers import AutoModelForSeq2SeqLM
|
4 |
from transformers import AutoTokenizer
|
5 |
from transformers import GenerationConfig
|
6 |
import re
|
|
|
7 |
model_name='google/flan-t5-base'
|
8 |
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
|
9 |
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
|
|
|
10 |
Examples_to_teach_model="""
|
11 |
Text: I hate apples
|
12 |
Sentiment analysis:
|
|
|
21 |
Sentiments: Negative
|
22 |
PPrint Key words: tired, long process
|
23 |
"""
|
|
|
24 |
def make_prompt(sentence):
|
25 |
prompt = Examples_to_teach_model+ "Text: " + sentence + "Sentiment analysis:"
|
|
|
26 |
return prompt
|
27 |
+
def split_conj(text):
|
28 |
+
return re.sub('(but|yet|although|however|nevertheless|on the other hand|still|though)', "|", text).split('|')
|
29 |
|
30 |
def get_sentiment_from_llm(review_text):
|
31 |
+
sentences = review_text.lower().split(".")
|
32 |
segments=[]
|
33 |
+
for sen in sentences:
|
|
|
|
|
|
|
34 |
if len(sen)>=3:
|
35 |
+
segments=segments+split_conj(sen)
|
36 |
+
ls_outputs=[]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
|
38 |
+
for seg in segments:
|
39 |
+
input = make_prompt(seg)
|
40 |
+
inputs = tokenizer(input, return_tensors='pt')
|
41 |
+
output = tokenizer.decode(
|
42 |
+
model.generate(
|
43 |
+
inputs["input_ids"],
|
44 |
+
max_new_tokens=100,
|
45 |
+
)[0],
|
46 |
+
skip_special_tokens=True)
|
47 |
+
ls_outputs.append("\n".join(output.split('PPrint ')))
|
48 |
return "\n".join(ls_outputs)
|
|
|
49 |
demo = gr.Blocks()
|
|
|
50 |
sentiment_extr = gr.Interface(
|
51 |
fn=get_sentiment_from_llm,
|
52 |
inputs=gr.Textbox(label="Text input", type="text"),
|
53 |
outputs=gr.Textbox(label="Sentiments", type="text"),
|
54 |
title="Sentiments analysis",
|
55 |
+
description="Sentiment analysis and keywords extraction. Powered by prompt tuned flan-t5 from Google. <br> The model is run on small CPU. Please allow 2-3 minutes for longer inputs.",
|
56 |
)
|
|
|
57 |
with demo:
|
58 |
gr.TabbedInterface([sentiment_extr], ["Sentiment text analysis"])
|
59 |
+
demo.launch()
|
|