thuyentruong commited on
Commit
f10267f
·
verified ·
1 Parent(s): 735581e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -11
app.py CHANGED
@@ -3,8 +3,10 @@ from transformers import pipeline
3
  from transformers import AutoModelForSeq2SeqLM
4
  from transformers import AutoTokenizer
5
  from transformers import GenerationConfig
 
 
6
 
7
- model_name='google/flan-t5-base'
8
  model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
9
  tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
10
 
@@ -31,16 +33,28 @@ def make_prompt(sentence):
31
 
32
  return prompt
33
 
34
- def get_sentiment_from_llm(sentence):
35
- input = make_prompt(sentence)
36
- inputs = tokenizer(input, return_tensors='pt')
37
- output = tokenizer.decode(
38
- model.generate(
39
- inputs["input_ids"],
40
- max_new_tokens=100,
41
- )[0],
42
- skip_special_tokens=True)
43
- return "\n".join(output.split('PPrint '))
 
 
 
 
 
 
 
 
 
 
 
 
44
 
45
  demo = gr.Blocks()
46
 
 
3
  from transformers import AutoModelForSeq2SeqLM
4
  from transformers import AutoTokenizer
5
  from transformers import GenerationConfig
6
+ from nltk.tokenize import sent_tokenize
7
+ import re
8
 
9
+ model_name='google/flan-t5-small'
10
  model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
11
  tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
12
 
 
33
 
34
  return prompt
35
 
36
+ def split_conj(text):
37
+ return map(str.strip, re.sub('(but|yet)', "|", text).split('|'))
38
+
39
+ def get_sentiment_from_llm(review_text):
40
+ sentences = sent_tokenize(review_text)
41
+ segments = []
42
+ for sen in sentences:
43
+ segments.append(split_conj(sen))
44
+
45
+ ls_outputs=[]
46
+ for seg in segments:
47
+ input = make_prompt(seg)
48
+ inputs = tokenizer(input, return_tensors='pt')
49
+ output = tokenizer.decode(
50
+ model.generate(
51
+ inputs["input_ids"],
52
+ max_new_tokens=100,
53
+ )[0],
54
+ skip_special_tokens=True)
55
+ ls_outputs.append("\n".join(output.split('PPrint ')))
56
+
57
+ return "\n".join(ls_outputs)
58
 
59
  demo = gr.Blocks()
60