cd14 laumiulun commited on
Commit
fbe47e5
·
0 Parent(s):

Duplicate from loxzdigital/Model_SA_Space

Browse files

Co-authored-by: Andy Lau <laumiulun@users.noreply.huggingface.co>

.gitattributes ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ftz filter=lfs diff=lfs merge=lfs -text
6
+ *.gz filter=lfs diff=lfs merge=lfs -text
7
+ *.h5 filter=lfs diff=lfs merge=lfs -text
8
+ *.joblib filter=lfs diff=lfs merge=lfs -text
9
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
10
+ *.model filter=lfs diff=lfs merge=lfs -text
11
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
12
+ *.npy filter=lfs diff=lfs merge=lfs -text
13
+ *.npz filter=lfs diff=lfs merge=lfs -text
14
+ *.onnx filter=lfs diff=lfs merge=lfs -text
15
+ *.ot filter=lfs diff=lfs merge=lfs -text
16
+ *.parquet filter=lfs diff=lfs merge=lfs -text
17
+ *.pickle filter=lfs diff=lfs merge=lfs -text
18
+ *.pkl filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pt filter=lfs diff=lfs merge=lfs -text
21
+ *.pth filter=lfs diff=lfs merge=lfs -text
22
+ *.rar filter=lfs diff=lfs merge=lfs -text
23
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
24
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
25
+ *.tflite filter=lfs diff=lfs merge=lfs -text
26
+ *.tgz filter=lfs diff=lfs merge=lfs -text
27
+ *.wasm filter=lfs diff=lfs merge=lfs -text
28
+ *.xz filter=lfs diff=lfs merge=lfs -text
29
+ *.zip filter=lfs diff=lfs merge=lfs -text
30
+ *.zst filter=lfs diff=lfs merge=lfs -text
31
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
32
+ *.sav filter=lfs diff=lfs merge=lfs -text
33
+ models/* filter=lfs diff=lfs merge=lfs -text
34
+ Tone_and_target.csv filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ __pycache__/
2
+ /.DS_Store
3
+ /.streamlit/
CI_RATES.csv ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ ,model,2_5,5,10,25,75,90,95,97_5
2
+ 0,Click_To_Open_Rate,-0.04112196411968941,-0.03438777926058009,-0.027337482186956127,-0.014792473005118267,0.014543842510280375,0.028110496211978253,0.035499639337518096,0.042420791954972455
3
+ 1,Conversion_Rate,-0.01937036282646773,-0.016724064382768754,-0.01320846866885363,-0.006978657696956481,0.0067317653782765115,0.012849716838728757,0.016753552736137472,0.020409019120631026
4
+ 2,Revenue_Per_Email,-3.885196454402065,-3.1344678960009262,-2.500005024239302,-1.344952303324935,1.3157780163896504,2.602544034460352,3.2714811650866737,3.9553102784548777
FunctionsModelSA_V1.py ADDED
@@ -0,0 +1,444 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import s3fs
2
+ import pandas as pd
3
+ import numpy as np
4
+ from numpy import arange
5
+ from colour import Color
6
+ import plotly.graph_objects as go
7
+ from nltk import tokenize
8
+ from IPython.display import Markdown
9
+ from PIL import ImageColor
10
+ from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
11
+ import nltk
12
+ nltk.download('punkt')
13
+ import email
14
+ import codecs
15
+ import pickle
16
+ import string
17
+ from scipy import spatial
18
+ import re
19
+ import pytorch_lightning as pl
20
+ from bs4 import BeautifulSoup
21
+ import ipywidgets as widgets
22
+ from ipywidgets import FileUpload
23
+ from urlextract import URLExtract
24
+ from transformers import BertTokenizerFast as BertTokenizer, BertModel, BertConfig
25
+ import torch.nn as nn
26
+ import torch
27
+ from ipywidgets import interact, Dropdown
28
+ import boto3
29
+ from sagemaker import get_execution_role
30
+ from scipy import spatial
31
+ from ipyfilechooser import FileChooser
32
+ import random
33
+
34
+
35
+ PARAMS={
36
+ 'BATCH_SIZE': 8,
37
+ 'MAX_TOKEN_COUNT':100,
38
+ 'BERT_MODEL_NAME':'google/bert_uncased_L-2_H-128_A-2' ,
39
+ 'N_EPOCHS': 10,
40
+ 'n_classes':8,
41
+ 'LABEL_COLUMNS': ['label_analytical', 'label_casual', 'label_confident', 'label_friendly',
42
+ 'label_joyful', 'label_optimistic', 'label_respectful',
43
+ 'label_urgent'],
44
+ 'TEXTCOL': 'text',
45
+ 'rf_labels':['label_analytical', 'label_casual', 'label_confident',
46
+ 'label_friendly', 'label_joyful', 'label_optimistic',
47
+ 'label_respectful', 'label_urgent',
48
+ 'industry_Academic and Education', 'industry_Energy',
49
+ 'industry_Entertainment', 'industry_Finance and Banking',
50
+ 'industry_Healthcare', 'industry_Hospitality', 'industry_Real Estate',
51
+ 'industry_Retail', 'industry_Software and Technology',
52
+ 'campaign_type_Abandoned_Cart', 'campaign_type_Engagement',
53
+ 'campaign_type_Newsletter', 'campaign_type_Product_Announcement',
54
+ 'campaign_type_Promotional', 'campaign_type_Review_Request',
55
+ 'campaign_type_Survey', 'campaign_type_Transactional',
56
+ 'campaign_type_Usage_and_Consumption', 'campaign_type_Webinar']
57
+ }
58
+
59
+ CI_rates=pd.read_csv('CI_RATES.csv')
60
+
61
+ ### create file uploading widget
62
+ def email_upload():
63
+ print("Please upload your email (In EML Format)")
64
+ upload = FileUpload(accept='.eml', multiple=True)
65
+ display(upload)
66
+ return upload
67
+
68
+ def parse_email(uploaded_file):
69
+ check=[]
70
+ filename = list(uploaded_file.value.keys())[0]
71
+ email_body_str = codecs.decode(uploaded_file.value[filename]['content'], encoding="utf-8")
72
+ b=email.message_from_string(email_body_str)
73
+ for part in b.walk():
74
+ if part.get_content_type():
75
+ body = str(part.get_payload())
76
+ soup = BeautifulSoup(body)
77
+ paragraphs = soup.find_all('body')
78
+ for paragraph in paragraphs:
79
+ check.append(paragraph.text)
80
+ file="".join(check)
81
+ return file
82
+
83
+
84
+ def text_clean(x,punct=True):
85
+
86
+ ### Light
87
+ x = x.lower() # lowercase everything
88
+ x = x.encode('ascii', 'ignore').decode() # remove unicode characters
89
+ x = re.sub(r'https*\S+', ' ', x) # remove links
90
+ x = re.sub(r'http*\S+', ' ', x)
91
+ # cleaning up text
92
+ x = re.sub(r'\'\w+', ' ', x)
93
+ x = re.sub(r'\w*\d+\w*', ' ', x)
94
+ x = re.sub(r'\s{2,}', ' ', x)
95
+ x = re.sub(r'\s[^\w\s]\s', ' ', x)
96
+
97
+ ### Heavy
98
+ x = re.sub(r'@\S', ' ', x)
99
+ x = re.sub(r'#\S+', ' ', x)
100
+ x=x.replace('=',' ')
101
+ if(punct==True):
102
+ x = re.sub('[%s]' % re.escape(string.punctuation), ' ', x)
103
+ # remove single letters and numbers surrounded by space
104
+ x = re.sub(r'\s[a-z]\s|\s[0-9]\s', ' ', x)
105
+ clean=[' Â\x8a','\t','\n','Ã\x83','Â\x92','Â\x93','Â\x8a','Â\x95']
106
+ for y in clean:
107
+ x=x.replace(y,'')
108
+
109
+ return x
110
+
111
+ ####BERT MODEL LOAD REQUIRMENTS#########
112
+
113
+
114
+ class ToneTagger(pl.LightningModule):
115
+
116
+ def __init__(self, n_classes: int, n_training_steps=None, n_warmup_steps=None):
117
+ super().__init__()
118
+ self.bert = BertModel.from_pretrained(PARAMS['BERT_MODEL_NAME'], return_dict=True)
119
+ self.classifier = nn.Linear(self.bert.config.hidden_size, n_classes)
120
+ self.n_training_steps = n_training_steps
121
+ self.n_warmup_steps = n_warmup_steps
122
+ self.criterion = nn.BCELoss()
123
+
124
+ def forward(self, input_ids, attention_mask):
125
+ output = self.bert(input_ids,attention_mask)
126
+ output = self.classifier(output.pooler_output)
127
+ output = torch.sigmoid(output)
128
+ return output
129
+
130
+
131
+ # LOAD IN PRE TRAINED MODEL WITH WEIGHTS
132
+ model=ToneTagger(8) # load up the model archetecture with 8 different tones
133
+ model.load_state_dict(torch.load("models/SAMODEL")) # populate the weights of the model
134
+ model.eval()
135
+
136
+
137
+
138
+ def bert_tones(text_sentences,model):
139
+ """ This function takes in setences and the model cleaned them then predicts the bert tones"""
140
+ predictions=[]
141
+ text=[]
142
+
143
+ tokenizer = BertTokenizer.from_pretrained('google/bert_uncased_L-2_H-128_A-2')
144
+ for sent in text_sentences:
145
+ text.append(text_clean(sent,False))
146
+ cleaned_text=text_clean(sent)
147
+ encoding = tokenizer.encode_plus(
148
+ cleaned_text,
149
+ add_special_tokens=True,
150
+ max_length=100,
151
+ return_token_type_ids=False,
152
+ padding="max_length",
153
+ truncation=True,
154
+ return_attention_mask=True,
155
+ return_tensors='pt',
156
+ )
157
+ with torch.no_grad():
158
+ inputs=encoding['input_ids']
159
+ attention=encoding['attention_mask']
160
+ pred=model(inputs,attention)
161
+ pred=pred.cpu().numpy()
162
+ predictions.append(np.array(pred[0]))
163
+
164
+ return text,predictions
165
+
166
+
167
+ def convert_text_to_tone(text,model=model,params=PARAMS):
168
+ """ This Function will convert the text to tone, it takes in the text with punctuations seperates it into senteces"""
169
+ data=[]
170
+ # Find the sentiment from vader sentiment analyzer (Not currently in use)
171
+ sid_obj = SentimentIntensityAnalyzer()
172
+ total_cleaned=text_clean(text)
173
+ sentiment_dict = sid_obj.polarity_scores(total_cleaned)# Find the sentiment from
174
+
175
+
176
+ text_sentences=tokenize.sent_tokenize(text) #Find all the different sentences through the NLTK library
177
+
178
+
179
+ plain_text,predictions=bert_tones(text_sentences,model)
180
+
181
+ data.append([plain_text,sentiment_dict,predictions])
182
+ final=pd.DataFrame(data,columns=['text','sentiment','sentencetone'])
183
+ # print(final)
184
+ agg_tones=final['sentencetone'].apply(np.mean,axis=0)
185
+ tones=pd.DataFrame(agg_tones.tolist(),columns=params['LABEL_COLUMNS'])
186
+ return final,tones
187
+
188
+
189
+ ### This will be abstracted away to a more dynamic model
190
+ brf='Rate_Models/bounce_rate_model.sav'
191
+ BRM = pickle.load(open(brf, 'rb'))
192
+ orf='Rate_Models/open_rate_model.sav'
193
+ ORM = pickle.load(open(orf, 'rb'))
194
+ urf='Rate_Models/unsubscribe_rate_model.sav'
195
+ URM = pickle.load(open(urf, 'rb'))
196
+ crf='Rate_Models/click_trough_rate_model.sav'
197
+ CRM = pickle.load(open(crf, 'rb'))
198
+ CV='Rate_Models/Conversion_rate.sav'
199
+ ConM = pickle.load(open(CV, 'rb'))
200
+ CTOR='Rate_Models/Click-To-Open_Rates.sav'
201
+ CTORM = pickle.load(open(CTOR, 'rb'))
202
+ RV='Rate_Models/Revenue_per_email.sav'
203
+ RVM = pickle.load(open(RV, 'rb'))
204
+
205
+ model_dict={'Open_Rate':ORM,
206
+ 'Click_Through_Rate': CRM,
207
+ 'Unsubscribe_Rate': URM,
208
+ 'Bounce_Rate':BRM,
209
+ 'Click_To_Open_Rate': CTORM,
210
+ 'Conversion_Rate': ConM,
211
+ 'Revenue_Per_Email':RVM}
212
+
213
+
214
+ ## Plot confidence interval
215
+ def plot_CI(pred,lower,upper,scale_factor=0.5):
216
+ """This function plots the confidence intervals of your prediction
217
+ pred- The prediction varaible given from the Random Forest for the target variable
218
+ lower- The lower half of the prediction confidence interval
219
+ upper- The upper half of the confidence interval
220
+ scale_factor- This will modify the size of the graph """
221
+
222
+
223
+ title=f'The Predicted Value is {pred}'
224
+ fig = go.Figure()
225
+ fig.update_xaxes(showgrid=False)
226
+ fig.update_yaxes(showgrid=False,
227
+ zeroline=True, zerolinecolor='black', zerolinewidth=3,
228
+ showticklabels=False)
229
+ fig.update_layout(height=200, plot_bgcolor='white')
230
+ fig.add_trace(go.Scatter(
231
+ x=[pred], y=[0,0], mode='markers', marker_size=10,line=dict(color="red")
232
+ ))
233
+ fig.update_layout(xaxis_range=[0,upper+upper*scale_factor])
234
+ fig.update_layout(showlegend=False)
235
+ fig.add_vline(x=lower,annotation_text=f"{lower}",annotation_position="top")
236
+ fig.add_vline(x=upper,annotation_text=f"{upper}",annotation_position="top")
237
+ fig.add_vrect(lower,upper,fillcolor='red',opacity=0.25,annotation_text='95% CI',annotation_position="outside top")
238
+ fig.update_layout(title_text=title, title_x=0.5)
239
+ fig.show()
240
+
241
+ def find_max_cat(df,target,industry,campaign):
242
+ #### Select entries with the matching industry and campaign (1 == True)
243
+ d=df[(df[campaign]==1) & (df[industry]==1)]
244
+ if(len(d)>0):
245
+ rec=df.loc[d[target].idxmax()][3:11] ## Select the tone values for the best target values
246
+ return round(d[target].min(),3),round(d[target].max(),3),rec ## select the top target variable value and return with the tones
247
+ else:
248
+ return 0,0,0
249
+
250
+
251
+ def scale_values(val, tn): ## val = slider value, tn = current tone value
252
+ val = tn*100
253
+ return val
254
+
255
+ tone_labels = ['Analytical', 'Casual', 'Confident', 'Friendly', 'Joyful', 'Optimistic', 'Respectful', 'Urgent']
256
+
257
+ # ## Plot recommendations - ORIGINAL FROM V1.0
258
+ # def recommend(tones,recommend_changes,change,target):
259
+ # ''' This function creates the recomended changes plots it takes it the tones, the changes and '''
260
+ # fig = go.Figure()
261
+ # fig.add_trace(go.Bar(
262
+ # y=tones.columns,
263
+ # x=tones.values[0],
264
+ # name='Current Tones',
265
+ # orientation='h',
266
+ # # text=np.round(tones.values[0],3),
267
+ # width=.9,
268
+ # marker=dict(
269
+ # color='#00e6b1',
270
+ # line=dict(color='rgba(58, 71, 80, 1.0)', width=3)
271
+ # )
272
+
273
+ # ))
274
+ # fig.add_trace(go.Bar(
275
+ # y=tones.columns,
276
+ # x=recommend_changes,
277
+ # name='Recommend changes',
278
+ # orientation='h',
279
+ # text=np.round(recommend_changes,3),
280
+ # width=.5,
281
+ # marker=dict(
282
+ # color='#e60f00',
283
+ # line=dict(color='rgba(58, 71, 80, 1.0)', width=3)
284
+ # )
285
+ # ))
286
+ # fig.update_traces(textfont_size=18, textposition="outside", cliponaxis=False)
287
+ # fig.update_layout(height=1000, plot_bgcolor='white')
288
+ # fig.update_layout(barmode='stack', yaxis={'categoryorder':'array','categoryarray': recommend_changes.sort_values(key=abs,ascending=True).index})
289
+ # fig.update_layout(title_text=f'The following Changes will yield a {round(change,3)} increase in {target}')
290
+ # fig.show()
291
+
292
+ ## Plot recommendations - MODIFIED
293
+ def recommend(tones,recommend_changes,change,target):
294
+ ''' This function creates the recomended changes plots it takes it the tones, the changes and '''
295
+
296
+ fig = go.Figure()
297
+ fig.add_trace(go.Bar(
298
+ # y=tones.columns,
299
+ y=tone_labels,
300
+ x=recommend_changes,
301
+ name='Recommend changes',
302
+ orientation='h',
303
+ text=np.round(recommend_changes,3),
304
+ width=.5,
305
+ marker=dict(
306
+ color='#e60f00',
307
+ line=dict(color='rgba(58, 71, 80, 1.0)', width=1)
308
+ )
309
+ ))
310
+ fig.update_traces(textfont_size=18, textposition="outside", cliponaxis=False)
311
+ # fig.update_layout(height=1000, plot_bgcolor='white')
312
+ # fig.update_layout(barmode='stack', yaxis={'categoryorder':'array','categoryarray': recommend_changes.sort_values(key=abs,ascending=True).index})
313
+ # fig.update_layout(title_text=f'The following Changes will yield a {round(change,3)} increase in {target}')
314
+ if target == 'Revenue_Per_Email':
315
+ out = f"${round(change,2)}"
316
+ else:
317
+ out = f"{round(change,2)*100}%"
318
+ fig.update_layout(title_text=f'The following Changes will yield a {out} increase in {target}')
319
+ fig.show()
320
+
321
+
322
+ def prediction(tones,campaign_val,industry_val,target):
323
+ model_val=pd.DataFrame(tones,columns=PARAMS['rf_labels']).fillna(0)
324
+ model_val.loc[0,campaign_val]=1
325
+ model_val.loc[0,industry_val]=1
326
+ model=model_dict[target]
327
+ pred=model.predict(model_val)[0]
328
+
329
+ # y_pred = regr.predict(X_test)
330
+ # r2_test = r2_score(y_test, y_pred)
331
+
332
+ CI=CI_rates[CI_rates['model']==target]
333
+ lower=pred+CI['2_5'].values[0]
334
+ higher=pred+CI['97_5'].values[0]
335
+ return pred,round(lower,3),round(higher,3),model
336
+
337
+
338
+ ## Plot recommendations for intensity changes
339
+ def intensity_changes(tones,recommend_changes,change,target):
340
+ ''' This function creates a plot to show the change made to intensities and shows the resulting change in target rate '''
341
+
342
+ fig = go.Figure()
343
+ fig.add_trace(go.Bar(
344
+ # y=tones.columns,
345
+ y=tone_labels,
346
+ x=recommend_changes,
347
+ name='Recommend changes',
348
+ orientation='h',
349
+ text=np.round(recommend_changes,3),
350
+ width=.5,
351
+ marker=dict(
352
+ color='#00e6b1',
353
+ line=dict(color='rgba(58, 71, 80, 1.0)', width=1)
354
+ )
355
+ ))
356
+ fig.update_traces(textfont_size=18, textposition="outside", cliponaxis=False)
357
+
358
+ if change < 0:
359
+ if target == 'Revenue_Per_Email':
360
+ out = f"${round(change*(-1),2)}"
361
+ else:
362
+ out = f"{round(change*(-1),2)}%"
363
+
364
+ fig.update_layout(title_text=f'The following Changes will decrease the {target} by {out}')
365
+
366
+ elif change >= 0:
367
+ if target == 'Revenue_Per_Email':
368
+ out = f"${round(change,2)}"
369
+ else:
370
+ out = f"{round(change,2)*100}%"
371
+ fig.update_layout(title_text=f'The following Changes will increase the {target} by {out}')
372
+
373
+ # fig.update_layout(title_text=f'The changes made to the tone intensities')
374
+ fig.show()
375
+
376
+
377
+ def load_data():
378
+ data_location='Tone_and_target.csv'
379
+ df=pd.read_csv(data_location)
380
+ df_unique = df.drop_duplicates()
381
+ df_unique = pd.get_dummies(df_unique, columns=['industry','campaign_type'])
382
+ df_data=df_unique.drop(columns=['Unnamed: 0','body'])
383
+ df_data=df_data.rename(columns={'Click-To-Open Rates':'Click_To_Open_Rate','Conversion Rate':'Conversion_Rate','Revenue Per email':'Revenue_Per_Email'})
384
+ return df_data
385
+
386
+
387
+ def plot_table(sorted_setence_tuple):
388
+ """ Plots the bottom most table, takes in a list of tuples where the tuple is the sentence the sentiment distance
389
+ from the best values """
390
+ sentences=list(zip(*sorted_setence_tuple))[0]
391
+ scores= list(zip(*sorted_setence_tuple))[1]
392
+ colors= list(zip(*sorted_setence_tuple))[2]
393
+ rbg_list=[]
394
+ for i in colors:
395
+ rbg_list.append('rgb'+str(i))
396
+ fig = go.Figure(data=[go.Table(
397
+ header=dict(values=['<b>Sentences</b>', '<b>Difference from Recommended Tone</b>'],
398
+ line_color = 'darkslategray',
399
+ fill_color = '#010405',
400
+ align = 'center',
401
+ font=dict(family="Metropolis",color='white', size=16)),
402
+ cells=dict(values=[sentences, # 1st column
403
+ scores] , # 2nd column
404
+ line_color='darkslategray',
405
+ fill_color=[rbg_list],
406
+ align=['left','center'],
407
+ font=dict(family="Arial",size=12)))
408
+ ])
409
+
410
+ #fig.show()
411
+
412
+ def corrections(best,df):
413
+ """This function finds the the difference between the tone of each sentence and the best tone for the desired metric
414
+ best- tone values of the best email for the current categories
415
+ df- dataframe of the sentences of the uploaded email and the """
416
+ sentence_order=[]
417
+ colors=['#48f0c9','#6ef5d6','#94f7e1','#bbfaec','#e6fff9','#ffe7e6','#ffc3bf','#ffa099','#ff7c73','#ff584d'] #loxz green primary to Loxz light red
418
+ for i in range(len(df['sentencetone'][0])):
419
+ text=df['text'][0][i]
420
+ cur=df['sentencetone'][0][i]
421
+ cosine_distance= spatial.distance.cosine(best,cur)
422
+ distance=cosine_distance # Cosine distance
423
+ new_value = round(( (distance - 0) / (1 - 0) ) * (100 - 0) + 0) # for distance metric this is just normalizing the varaible
424
+ color_value=round(( (distance - 0) / (1 - 0) ) * (10 - 0) + 0) # Color whell value
425
+ col=colors[(color_value)]
426
+ rbg=ImageColor.getcolor(f'{col}', "RGB")
427
+ sentence_order.append((text,new_value,rbg))
428
+ sorted_sentences=sorted(sentence_order,key=lambda x: x[1],reverse=True)
429
+ plot_table(sorted_sentences)
430
+
431
+ def read_file(fc):
432
+ with open(fc.selected) as file: # Use file to refer to the file object
433
+ data = file.read()
434
+ check=[]
435
+ b=email.message_from_string(data)
436
+ for part in b.walk():
437
+ if part.get_content_type():
438
+ body = str(part.get_payload())
439
+ soup = BeautifulSoup(body)
440
+ paragraphs = soup.find_all('body')
441
+ for paragraph in paragraphs:
442
+ check.append(paragraph.text)
443
+ file="".join(check)
444
+ return file
README.md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Model SA Space
3
+ emoji: 🚀
4
+ colorFrom: yellow
5
+ colorTo: indigo
6
+ sdk: streamlit
7
+ sdk_version: 1.10.0
8
+ app_file: app.py
9
+ pinned: false
10
+ duplicated_from: loxzdigital/Model_SA_Space
11
+ ---
12
+
13
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
Rate_Models/Click-To-Open_Rates.sav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e9cea028c663cce78104e961e1aa7f6481981d4574d81edcf63a4da416a41f65
3
+ size 3050503
Rate_Models/Conversion_rate.sav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a12e25f7bf2b980f0ff449a692c8cc116ed33ab400ec7a3ede30bcc38cb7db99
3
+ size 1235487
Rate_Models/Revenue_per_email.sav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a932241992fd2655b373567cc28b74a861e97cf4e0555c5c8f303d861ca3639
3
+ size 625803
Rate_Models/bounce_rate_model.sav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9edd3b1f3c0c4a199d370be5ebbac39729c14ba1c64baf7ed9e4b9b8b682bcab
3
+ size 1490487
Rate_Models/click_trough_rate_model.sav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2bc77675873651fd9cdf2ce585b3673faaffe08ae8f07e06db05989cff253879
3
+ size 3816121
Rate_Models/open_rate_model.sav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e2b1dac902f3081d83f2ce047d725d8a35c59d388993fd7aae3e66437bc25bc4
3
+ size 1974887
Rate_Models/unsubscribe_rate_model.sav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:519c4094f5740288387e5f26097ea7c1d07f244801402268b9bc19962daae1c0
3
+ size 1954791
Tone_and_target.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c329e909d4a78e975696ca39093b627da865b1efed061832222705eefd7c89e3
3
+ size 32018412
app.py ADDED
@@ -0,0 +1,291 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from ast import arg
2
+ import FunctionsModelSA_V1
3
+ import numpy as np
4
+ import streamlit as st
5
+ import pandas as pd
6
+ import PIL
7
+ import time
8
+
9
+ import main_app
10
+ import utils
11
+ from colour import Color
12
+ import plotly.graph_objects as go
13
+ from nltk import tokenize
14
+ from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
15
+ import nltk
16
+ nltk.download('punkt')
17
+ import codecs
18
+ import pickle
19
+ import string
20
+ from scipy import spatial
21
+ import pytorch_lightning as pl
22
+ from urlextract import URLExtract
23
+ from transformers import BertTokenizerFast as BertTokenizer, BertModel, BertConfig
24
+ import streamlit_analytics
25
+
26
+ from FunctionsModelSA_V1 import *
27
+ #from model_V1 import *
28
+
29
+ def table_data():
30
+ # creating table data
31
+ field = [
32
+ 'Data Scientist',
33
+ 'Dataset',
34
+ 'Algorithm',
35
+ 'Framework',
36
+ 'Ensemble',
37
+ 'Domain',
38
+ 'Model Size'
39
+ ]
40
+
41
+ data = [
42
+ 'Jeffrey Ott',
43
+ 'Internal + Campaign monitor',
44
+ 'BERT_Uncased_L_2_H_128_A-2, Single Linear Layer Neural Network, Random Forest',
45
+ 'Pytorch',
46
+ 'Bootstrapping',
47
+ 'NLP Text Classification',
48
+ '16.8 MB'
49
+ ]
50
+
51
+ data = {
52
+ 'Field':field,
53
+ 'Data':data
54
+ }
55
+
56
+ df = pd.DataFrame.from_dict(data)
57
+
58
+ return df
59
+
60
+
61
+
62
+ def add_bg_from_url():
63
+ st.markdown(
64
+ f"""
65
+ <style>
66
+ .stApp {{
67
+ background-image: linear-gradient(#45eff5,#1C8D99);
68
+ background-attachment: fixed;
69
+ background-size: cover
70
+ }}
71
+ </style>
72
+ """,
73
+ unsafe_allow_html=True
74
+
75
+ )
76
+
77
+ add_bg_from_url()
78
+ #linear-gradient(#0A3144,#126072,#1C8D99);
79
+
80
+
81
+ #add whitespace to fix scroll
82
+ def WHITE_SPACE(lines):
83
+ for x in range(lines):
84
+ st.write('&nbsp;')
85
+
86
+
87
+ with st.sidebar:
88
+
89
+ with st.expander('Model Description', expanded=False):
90
+ img = PIL.Image.open("figures/ModelSA.png")
91
+ st.image(img)
92
+ st.markdown('The model seeks to solve the problem of how to set the tone for an email campaign appropriately. This 5th generation model from the Loxz family uses state-of-the-art NLP to determine and predict the optimized sentiment of an email using tokenization techniques. The model will analyze any email text “shape” and help the user understand the tone and how that tone correlates with the metric of interest. We applied a pre-trained tiny BERT model to vectorize the email campaign text body, then a softmax dense layer was added to get the multi-label classifications. Email metrics are provided prior to campaign launch, and the model determines the optimal engagement rate based on several factors, including inputs by the campaign engineer.')
93
+
94
+ with st.expander('Model Information', expanded=False):
95
+ hide_table_row_index = """
96
+ <style>
97
+ thead tr th:first-child {display:none}
98
+ tbody th {display:none}
99
+ </style>
100
+ """
101
+ st.markdown(hide_table_row_index, unsafe_allow_html=True)
102
+ st.table(table_data())
103
+
104
+ utils.url_button('Model Homepage','https://loxz.com/#/models/SA')
105
+ # url_button('Full Report','https://resources.loxz.com/reports/realtime-ml-character-count-model')
106
+ utils.url_button('Amazon Market Place','https://aws.amazon.com/marketplace')
107
+
108
+ WHITE_SPACE(15)
109
+ st.caption("All models owned by Loxz Digital")
110
+
111
+
112
+ industry_lists = ['Software and Technology', 'Academic and Education',
113
+ 'Entertainment', 'Finance and Banking', 'Hospitality',
114
+ 'Real Estate', 'Retail', 'Energy', 'Healthcare']
115
+
116
+ campaign_types = ['Webinar', 'Engagement', 'Product_Announcement', 'Promotional',
117
+ 'Newsletter', 'Abandoned_Cart', 'Review_Request', 'Survey',
118
+ 'Transactional', 'Usage_and_Consumption']
119
+
120
+ target_variables = ['Conversion_Rate','Click_To_Open_Rate','Revenue_Per_Email']
121
+
122
+ region_variables = ['Africa', 'America', 'Asia', 'Europe', 'Oceania']
123
+
124
+
125
+ st.markdown("#### Sentiment Analysis: Email Industry v1.2")
126
+
127
+ stats_col1, stats_col2, stats_col3, stats_col4 = st.columns([1,1,1,1])
128
+
129
+ with stats_col1:
130
+ st.caption("Verified: Production")
131
+ #st.metric(label="Verified", value= "Production")
132
+ with stats_col2:
133
+ st.caption("Accuracy: 85%")
134
+ #st.metric(label="Accuracy", value= "85%")
135
+ with stats_col3:
136
+ st.caption("Speed: 3.86 ms")
137
+ #st.metric(label="Speed", value="3.86 ms")
138
+
139
+ with stats_col4:
140
+ st.caption("Industry: Email")
141
+ #st.metric(label="Industry", value="Email")
142
+
143
+ input_text = st.text_area("Please enter your email text here: ", height=50)
144
+
145
+ with st.expander('Please select your parameters: '):
146
+ with streamlit_analytics.track('test123'):
147
+ industry = st.selectbox(
148
+ 'Please select your industry',
149
+ industry_lists,
150
+ index=6
151
+ )
152
+
153
+ target = st.selectbox(
154
+ 'Please select your target variable',
155
+ target_variables,
156
+ index=1
157
+ )
158
+
159
+ campaign = st.selectbox(
160
+ 'Please select your campaign type',
161
+ campaign_types,
162
+ index=7
163
+ )
164
+
165
+ region = st.selectbox(
166
+ 'Select your target region to generate the email with a more appropriate tone:',
167
+ region_variables,
168
+ index=1
169
+ )
170
+
171
+
172
+ with st.expander('Adjust your tone intensities for your preference: '):
173
+ #'Joyful', 'Confident', 'Urgent', 'Friendly', 'Optimistic', 'Analytical', 'Casual'
174
+ joyful_tone_value = st.slider(
175
+ 'Joyful: ',
176
+ value = 0
177
+ )
178
+ st.write('Joyful: ', joyful_tone_value)
179
+
180
+ confident_tone_value = st.slider(
181
+ 'Confident: ',
182
+ value = 0
183
+ )
184
+ st.write('Confident: ', confident_tone_value)
185
+
186
+ urgent_tone_value = st.slider(
187
+ 'Urgent: ',
188
+ value = 0
189
+ )
190
+ st.write('Urgent: ', urgent_tone_value)
191
+
192
+ friendly_tone_value = st.slider(
193
+ 'Friendly: ',
194
+ value = 0
195
+ )
196
+ st.write('Friendly: ', friendly_tone_value)
197
+
198
+ optimistic_tone_value = st.slider(
199
+ 'Optimistic: ',
200
+ value = 0
201
+ )
202
+ st.write('Optimistic: ', optimistic_tone_value)
203
+
204
+ analytical_tone_value = st.slider(
205
+ 'Analytical: ',
206
+ value = 0
207
+ )
208
+ st.write('Analytical: ', analytical_tone_value)
209
+
210
+ casual_tone_value = st.slider(
211
+ 'Casual: ',
212
+ value = 0
213
+ )
214
+ st.write('Casual: ', casual_tone_value)
215
+
216
+ Loxz_recom_box = st.checkbox('Select Loxz Recommended Tones for Optimal Output')
217
+
218
+ def create_charts():
219
+ # Starting predictions
220
+ bucket='emailcampaignmodeldata'
221
+ # file_key = 'fullEmailBody/fullemailtextbody_labeled_3rates_8tones_20220524.csv'
222
+
223
+ # email_data = utils.get_files_from_aws(bucket,file_key)
224
+ tone_key = 'ModelSADataSets/Tone_and_target.csv'
225
+ tone_data = FunctionsModelSA_V1.load_data()
226
+
227
+ test_predictions,tones = FunctionsModelSA_V1.convert_text_to_tone(input_text)
228
+
229
+ # st.dataframe(test_predictions)
230
+ # st.dataframe(tones)
231
+ campaign_val='campaign_type_'+ campaign
232
+ industry_val='industry_'+ industry
233
+ pred,lower,upper,model = FunctionsModelSA_V1.prediction(tones,campaign_val,industry_val,target)
234
+ worst_target,best_target,best_target_tones = FunctionsModelSA_V1.find_max_cat(tone_data,target,industry_val,campaign_val)
235
+ #best_target, best_target_tones
236
+ #FunctionsModelSA_V1.plot_CI(pred,lower,upper)
237
+ st.write('The Predicted Value is ' + str(pred))
238
+ fig1 = go.Figure(go.Bar(
239
+ name = 'Tone Levels',
240
+ x=[joyful_tone_value, confident_tone_value, urgent_tone_value, friendly_tone_value, optimistic_tone_value, analytical_tone_value, casual_tone_value],
241
+ y=['Joyful', 'Confident', 'Urgent', 'Friendly', 'Optimistic', 'Analytical', 'Casual'],
242
+ orientation='h')
243
+ )
244
+
245
+ st.plotly_chart(fig1, use_container_width=True)
246
+
247
+ #if((best_target!=0) and (pred<best_target)):
248
+ if Loxz_recom_box == True:
249
+ recommended_changes=(best_target_tones)
250
+ change=best_target-pred
251
+ #recommend(tones,recommended_changes,change,target)
252
+ fig2 = go.Figure()
253
+ fig2.add_trace(go.Bar(
254
+ y=tone_labels,
255
+ x=recommended_changes,
256
+ name='Recommend changes',
257
+ orientation='h',
258
+ text=np.round(recommended_changes,3),
259
+ width=.5,
260
+ marker=dict(
261
+ color='#e60f00',
262
+ line=dict(color='rgba(58, 71, 80, 1.0)', width=1)))
263
+ )
264
+ fig2.update_traces(textfont_size=18, textposition="outside", cliponaxis=False)
265
+ if target == 'Revenue_Per_Email':
266
+ out = f"${round(change,2)}"
267
+ st.write("The output will be between the range " + str(round(lower,2)) + ' and ' + str(round(upper,2)))
268
+ st.write("The Predicted "+str(target) +" is "+ str(round(pred,2)))
269
+ else:
270
+ out = f"{round(change,2)*100}%"
271
+ st.write("The output will be between the range " + str(round(lower,2) * 100) + ' and ' + str(round(upper,2) *100))
272
+ st.write("The Predicted "+str(target) +" is "+ str(round(pred,2)*100))
273
+ fig2.update_layout(title_text=f'The following Changes will yield a {out} increase in {target}')
274
+
275
+ st.plotly_chart(fig2, use_container_width=True)
276
+ #FunctionsModelSA_V1.corrections(best_target_tones,test_predictions))
277
+
278
+
279
+ if st.button('Generate Predictions'):
280
+ start_time = time.time()
281
+ if input_text == "":
282
+ st.error('Please enter a sentence!')
283
+ else:
284
+ placeholder = st.empty()
285
+ placeholder.text('Loading Data...')
286
+
287
+ create_charts()
288
+
289
+ placeholder.empty()
290
+
291
+
figures/ModelSA.png ADDED
main_app.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+
3
+ st.set_page_config(layout="wide")
4
+
5
+ st.markdown(
6
+ """
7
+ <style>
8
+ body {
9
+ background-image: linear-gradient(#2e7bcf,#2e7bcf);
10
+ color: white;
11
+ }
12
+ </style>
13
+ """,
14
+ unsafe_allow_html=True,
15
+ )
models/SAMODEL ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:74109f83ef03f7e9368cf615083c5fa691c88c4d3d6d52f54add5d03804a40b2
3
+ size 17568809
requirements.txt ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ pandas
2
+ numpy
3
+ colour
4
+ nltk
5
+ plotly
6
+ pytorch-lightning
7
+ vaderSentiment
8
+ transformers
9
+ urlextract
10
+ scipy
11
+ bs4
12
+ boto3
13
+ torch
14
+ bokeh==2.4.1
15
+ scikit-learn
16
+ streamlit-analytics
17
+ streamlit
18
+ s3fs
19
+ ipython
20
+ ipywidgets
21
+ sagemaker
22
+ ipyfilechooser
utils.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # import s3fs
2
+ from io import StringIO
3
+ import pandas as pd
4
+ import boto3
5
+ from bokeh.models.widgets import Div
6
+ import streamlit as st
7
+
8
+
9
+ def get_files_from_aws(bucket,prefix):
10
+ """
11
+ get files from aws s3 bucket
12
+
13
+ bucket (STRING): bucket name
14
+ prefix (STRING): file location in s3 bucket
15
+ """
16
+ s3_client = boto3.client('s3',
17
+ aws_access_key_id = st.secrets["aws_id"],
18
+ aws_secret_access_key = st.secrets["aws_key"])
19
+
20
+ file_obj = s3_client.get_object(Bucket=bucket,Key=prefix)
21
+ body = file_obj['Body']
22
+ string = body.read().decode('utf-8')
23
+
24
+ df = pd.read_csv(StringIO(string))
25
+
26
+ return df
27
+
28
+ def url_button(button_name,url):
29
+ if st.button(button_name):
30
+ js = """window.open('{url}')""".format(url=url) # New tab or window
31
+ html = '<img src onerror="{}">'.format(js)
32
+ div = Div(text=html)
33
+ st.bokeh_chart(div)
34
+