Model_SA_Space

Runtime error

App Files Files Community

cd14

laumiulun commited on Aug 30, 2023

Commit

fbe47e5

0 Parent(s):

Duplicate from loxzdigital/Model_SA_Space

Browse files

Co-authored-by: Andy Lau <laumiulun@users.noreply.huggingface.co>

Files changed (19) hide show

.gitattributes +34 -0
.gitignore +3 -0
CI_RATES.csv +4 -0
FunctionsModelSA_V1.py +444 -0
README.md +13 -0
Rate_Models/Click-To-Open_Rates.sav +3 -0
Rate_Models/Conversion_rate.sav +3 -0
Rate_Models/Revenue_per_email.sav +3 -0
Rate_Models/bounce_rate_model.sav +3 -0
Rate_Models/click_trough_rate_model.sav +3 -0
Rate_Models/open_rate_model.sav +3 -0
Rate_Models/unsubscribe_rate_model.sav +3 -0
Tone_and_target.csv +3 -0
app.py +291 -0
figures/ModelSA.png +0 -0
main_app.py +15 -0
models/SAMODEL +3 -0
requirements.txt +22 -0
utils.py +34 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,34 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
+*.sav filter=lfs diff=lfs merge=lfs -text
+models/* filter=lfs diff=lfs merge=lfs -text
+Tone_and_target.csv filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,3 @@

+__pycache__/
+/.DS_Store
+/.streamlit/

CI_RATES.csv ADDED Viewed

	@@ -0,0 +1,4 @@

+,model,2_5,5,10,25,75,90,95,97_5
+0,Click_To_Open_Rate,-0.04112196411968941,-0.03438777926058009,-0.027337482186956127,-0.014792473005118267,0.014543842510280375,0.028110496211978253,0.035499639337518096,0.042420791954972455
+1,Conversion_Rate,-0.01937036282646773,-0.016724064382768754,-0.01320846866885363,-0.006978657696956481,0.0067317653782765115,0.012849716838728757,0.016753552736137472,0.020409019120631026
+2,Revenue_Per_Email,-3.885196454402065,-3.1344678960009262,-2.500005024239302,-1.344952303324935,1.3157780163896504,2.602544034460352,3.2714811650866737,3.9553102784548777

FunctionsModelSA_V1.py ADDED Viewed

	@@ -0,0 +1,444 @@

+import s3fs
+import pandas as pd
+import numpy as np
+from numpy import arange
+from colour import Color
+import plotly.graph_objects as go
+from nltk import tokenize
+from IPython.display import Markdown
+from PIL import ImageColor
+from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
+import nltk
+nltk.download('punkt')
+import email
+import codecs
+import pickle
+import string
+from scipy import spatial
+import re
+import pytorch_lightning as pl
+from bs4 import BeautifulSoup
+import ipywidgets as widgets
+from ipywidgets import FileUpload
+from urlextract import URLExtract
+from transformers import BertTokenizerFast as BertTokenizer, BertModel, BertConfig
+import torch.nn as nn
+import torch
+from ipywidgets import interact, Dropdown
+import boto3
+from sagemaker import get_execution_role
+from scipy import spatial
+from ipyfilechooser import FileChooser
+import random
+PARAMS={
+'BATCH_SIZE': 8,
+'MAX_TOKEN_COUNT':100,
+'BERT_MODEL_NAME':'google/bert_uncased_L-2_H-128_A-2'  ,
+'N_EPOCHS': 10,
+'n_classes':8,
+'LABEL_COLUMNS': ['label_analytical', 'label_casual', 'label_confident', 'label_friendly',
+       'label_joyful', 'label_optimistic', 'label_respectful',
+       'label_urgent'],
+'TEXTCOL': 'text',
+'rf_labels':['label_analytical', 'label_casual', 'label_confident',
+       'label_friendly', 'label_joyful', 'label_optimistic',
+       'label_respectful', 'label_urgent',
+       'industry_Academic and Education', 'industry_Energy',
+       'industry_Entertainment', 'industry_Finance and Banking',
+       'industry_Healthcare', 'industry_Hospitality', 'industry_Real Estate',
+       'industry_Retail', 'industry_Software and Technology',
+       'campaign_type_Abandoned_Cart', 'campaign_type_Engagement',
+       'campaign_type_Newsletter', 'campaign_type_Product_Announcement',
+       'campaign_type_Promotional', 'campaign_type_Review_Request',
+       'campaign_type_Survey', 'campaign_type_Transactional',
+       'campaign_type_Usage_and_Consumption', 'campaign_type_Webinar']
+}
+CI_rates=pd.read_csv('CI_RATES.csv')
+### create file uploading widget
+def email_upload():
+    print("Please upload your email (In EML Format)")
+    upload = FileUpload(accept='.eml', multiple=True)
+    display(upload)
+    return upload
+def parse_email(uploaded_file):
+    check=[]
+    filename = list(uploaded_file.value.keys())[0]
+    email_body_str = codecs.decode(uploaded_file.value[filename]['content'], encoding="utf-8")
+    b=email.message_from_string(email_body_str)
+    for part in b.walk():
+        if part.get_content_type():
+            body = str(part.get_payload())
+            soup = BeautifulSoup(body)
+            paragraphs = soup.find_all('body')
+            for paragraph in paragraphs:
+                check.append(paragraph.text)
+            file="".join(check)
+    return file
+def text_clean(x,punct=True):
+    ### Light
+    x = x.lower() # lowercase everything
+    x = x.encode('ascii', 'ignore').decode()  # remove unicode characters
+    x = re.sub(r'https*\S+', ' ', x) # remove links
+    x = re.sub(r'http*\S+', ' ', x)
+    # cleaning up text
+    x = re.sub(r'\'\w+', ' ', x)
+    x = re.sub(r'\w*\d+\w*', ' ', x)
+    x = re.sub(r'\s{2,}', ' ', x)
+    x = re.sub(r'\s[^\w\s]\s', ' ', x)
+    ### Heavy
+    x = re.sub(r'@\S', ' ', x)
+    x = re.sub(r'#\S+', ' ', x)
+    x=x.replace('=',' ')
+    if(punct==True):
+        x = re.sub('[%s]' % re.escape(string.punctuation), ' ', x)
+    # remove single letters and numbers surrounded by space
+    x = re.sub(r'\s[a-z]\s|\s[0-9]\s', ' ', x)
+    clean=[' Â\x8a','\t','\n','Ã\x83','Â\x92','Â\x93','Â\x8a','Â\x95']
+    for y in clean:
+        x=x.replace(y,'')
+    return x
+####BERT MODEL LOAD REQUIRMENTS#########
+class ToneTagger(pl.LightningModule):
+    def __init__(self, n_classes: int, n_training_steps=None, n_warmup_steps=None):
+        super().__init__()
+        self.bert = BertModel.from_pretrained(PARAMS['BERT_MODEL_NAME'], return_dict=True)
+        self.classifier = nn.Linear(self.bert.config.hidden_size, n_classes)
+        self.n_training_steps = n_training_steps
+        self.n_warmup_steps = n_warmup_steps
+        self.criterion = nn.BCELoss()
+    def forward(self, input_ids, attention_mask):
+        output = self.bert(input_ids,attention_mask)
+        output = self.classifier(output.pooler_output)
+        output = torch.sigmoid(output)
+        return output
+# LOAD IN PRE TRAINED MODEL WITH WEIGHTS
+model=ToneTagger(8) # load up the model archetecture with 8 different tones
+model.load_state_dict(torch.load("models/SAMODEL")) # populate the weights of the model
+model.eval()
+def bert_tones(text_sentences,model):
+    """ This function takes in setences and the model cleaned them then predicts the bert tones"""
+    predictions=[]
+    text=[]
+    tokenizer = BertTokenizer.from_pretrained('google/bert_uncased_L-2_H-128_A-2')
+    for sent in text_sentences:
+        text.append(text_clean(sent,False))
+        cleaned_text=text_clean(sent)
+        encoding = tokenizer.encode_plus(
+                    cleaned_text,
+                    add_special_tokens=True,
+                    max_length=100,
+                    return_token_type_ids=False,
+                    padding="max_length",
+                    truncation=True,
+                    return_attention_mask=True,
+                    return_tensors='pt',
+                )
+        with torch.no_grad():
+            inputs=encoding['input_ids']
+            attention=encoding['attention_mask']
+            pred=model(inputs,attention)
+            pred=pred.cpu().numpy()
+            predictions.append(np.array(pred[0]))
+    return text,predictions
+def convert_text_to_tone(text,model=model,params=PARAMS):
+    """ This Function will convert the text to tone, it takes in the text with punctuations seperates it into senteces"""
+    data=[]
+    # Find the sentiment from vader sentiment analyzer (Not currently in use)
+    sid_obj = SentimentIntensityAnalyzer()
+    total_cleaned=text_clean(text)
+    sentiment_dict = sid_obj.polarity_scores(total_cleaned)#  Find the sentiment from
+    text_sentences=tokenize.sent_tokenize(text) #Find all the different sentences through the NLTK library
+    plain_text,predictions=bert_tones(text_sentences,model)
+    data.append([plain_text,sentiment_dict,predictions])
+    final=pd.DataFrame(data,columns=['text','sentiment','sentencetone'])
+#     print(final)
+    agg_tones=final['sentencetone'].apply(np.mean,axis=0)
+    tones=pd.DataFrame(agg_tones.tolist(),columns=params['LABEL_COLUMNS'])
+    return final,tones
+### This will be abstracted away to a more dynamic model
+brf='Rate_Models/bounce_rate_model.sav'
+BRM = pickle.load(open(brf, 'rb'))
+orf='Rate_Models/open_rate_model.sav'
+ORM = pickle.load(open(orf, 'rb'))
+urf='Rate_Models/unsubscribe_rate_model.sav'
+URM = pickle.load(open(urf, 'rb'))
+crf='Rate_Models/click_trough_rate_model.sav'
+CRM = pickle.load(open(crf, 'rb'))
+CV='Rate_Models/Conversion_rate.sav'
+ConM = pickle.load(open(CV, 'rb'))
+CTOR='Rate_Models/Click-To-Open_Rates.sav'
+CTORM = pickle.load(open(CTOR, 'rb'))
+RV='Rate_Models/Revenue_per_email.sav'
+RVM = pickle.load(open(RV, 'rb'))
+model_dict={'Open_Rate':ORM,
+'Click_Through_Rate': CRM,
+'Unsubscribe_Rate': URM,
+'Bounce_Rate':BRM,
+'Click_To_Open_Rate': CTORM,
+'Conversion_Rate': ConM,
+'Revenue_Per_Email':RVM}
+## Plot confidence interval
+def plot_CI(pred,lower,upper,scale_factor=0.5):
+    """This function plots the confidence intervals of your prediction
+    pred- The prediction varaible given from the Random Forest for the target variable
+    lower- The lower half of the prediction confidence interval
+    upper- The upper half of the confidence interval
+    scale_factor- This will modify the size of the graph """
+    title=f'The Predicted Value is {pred}'
+    fig = go.Figure()
+    fig.update_xaxes(showgrid=False)
+    fig.update_yaxes(showgrid=False,
+                     zeroline=True, zerolinecolor='black', zerolinewidth=3,
+                     showticklabels=False)
+    fig.update_layout(height=200, plot_bgcolor='white')
+    fig.add_trace(go.Scatter(
+        x=[pred], y=[0,0], mode='markers', marker_size=10,line=dict(color="red")
+    ))
+    fig.update_layout(xaxis_range=[0,upper+upper*scale_factor])
+    fig.update_layout(showlegend=False)
+    fig.add_vline(x=lower,annotation_text=f"{lower}",annotation_position="top")
+    fig.add_vline(x=upper,annotation_text=f"{upper}",annotation_position="top")
+    fig.add_vrect(lower,upper,fillcolor='red',opacity=0.25,annotation_text='95% CI',annotation_position="outside top")
+    fig.update_layout(title_text=title, title_x=0.5)
+    fig.show()
+def find_max_cat(df,target,industry,campaign):
+    #### Select entries with the matching industry and campaign (1 == True)
+    d=df[(df[campaign]==1) & (df[industry]==1)]
+    if(len(d)>0):
+        rec=df.loc[d[target].idxmax()][3:11]  ## Select the tone values for the best target values
+        return round(d[target].min(),3),round(d[target].max(),3),rec  ## select the top target variable value and return with the tones
+    else:
+        return 0,0,0
+def scale_values(val, tn):  ## val = slider value, tn = current tone value
+    val = tn*100
+    return val
+tone_labels = ['Analytical', 'Casual', 'Confident', 'Friendly', 'Joyful', 'Optimistic', 'Respectful', 'Urgent']
+# ## Plot recommendations - ORIGINAL FROM V1.0
+# def recommend(tones,recommend_changes,change,target):
+#     ''' This function creates the recomended changes plots it takes it the tones, the changes and '''
+#     fig = go.Figure()
+#     fig.add_trace(go.Bar(
+#         y=tones.columns,
+#         x=tones.values[0],
+#         name='Current Tones',
+#         orientation='h',
+#        # text=np.round(tones.values[0],3),
+#         width=.9,
+#         marker=dict(
+#             color='#00e6b1',
+#             line=dict(color='rgba(58, 71, 80, 1.0)', width=3)
+#         )
+#     ))
+#     fig.add_trace(go.Bar(
+#         y=tones.columns,
+#         x=recommend_changes,
+#         name='Recommend changes',
+#         orientation='h',
+#         text=np.round(recommend_changes,3),
+#         width=.5,
+#         marker=dict(
+#             color='#e60f00',
+#             line=dict(color='rgba(58, 71, 80, 1.0)', width=3)
+#         )
+#     ))
+#     fig.update_traces(textfont_size=18, textposition="outside", cliponaxis=False)
+#     fig.update_layout(height=1000, plot_bgcolor='white')
+#     fig.update_layout(barmode='stack', yaxis={'categoryorder':'array','categoryarray': recommend_changes.sort_values(key=abs,ascending=True).index})
+#     fig.update_layout(title_text=f'The following Changes will yield a {round(change,3)} increase in {target}')
+#     fig.show()
+## Plot recommendations - MODIFIED
+def recommend(tones,recommend_changes,change,target):
+    ''' This function creates the recomended changes plots it takes it the tones, the changes and '''
+    fig = go.Figure()
+    fig.add_trace(go.Bar(
+#         y=tones.columns,
+        y=tone_labels,
+        x=recommend_changes,
+        name='Recommend changes',
+        orientation='h',
+        text=np.round(recommend_changes,3),
+        width=.5,
+        marker=dict(
+            color='#e60f00',
+            line=dict(color='rgba(58, 71, 80, 1.0)', width=1)
+        )
+    ))
+    fig.update_traces(textfont_size=18, textposition="outside", cliponaxis=False)
+#     fig.update_layout(height=1000, plot_bgcolor='white')
+#     fig.update_layout(barmode='stack', yaxis={'categoryorder':'array','categoryarray': recommend_changes.sort_values(key=abs,ascending=True).index})
+#     fig.update_layout(title_text=f'The following Changes will yield a {round(change,3)} increase in {target}')
+    if target == 'Revenue_Per_Email':
+        out = f"${round(change,2)}"
+    else:
+        out = f"{round(change,2)*100}%"
+    fig.update_layout(title_text=f'The following Changes will yield a {out} increase in {target}')
+    fig.show()
+def prediction(tones,campaign_val,industry_val,target):
+    model_val=pd.DataFrame(tones,columns=PARAMS['rf_labels']).fillna(0)
+    model_val.loc[0,campaign_val]=1
+    model_val.loc[0,industry_val]=1
+    model=model_dict[target]
+    pred=model.predict(model_val)[0]
+#     y_pred = regr.predict(X_test)
+#     r2_test = r2_score(y_test, y_pred)
+    CI=CI_rates[CI_rates['model']==target]
+    lower=pred+CI['2_5'].values[0]
+    higher=pred+CI['97_5'].values[0]
+    return pred,round(lower,3),round(higher,3),model
+## Plot recommendations for intensity changes
+def intensity_changes(tones,recommend_changes,change,target):
+    ''' This function creates a plot to show the change made to intensities and shows the resulting change in target rate '''
+    fig = go.Figure()
+    fig.add_trace(go.Bar(
+#         y=tones.columns,
+        y=tone_labels,
+        x=recommend_changes,
+        name='Recommend changes',
+        orientation='h',
+        text=np.round(recommend_changes,3),
+        width=.5,
+        marker=dict(
+            color='#00e6b1',
+            line=dict(color='rgba(58, 71, 80, 1.0)', width=1)
+        )
+    ))
+    fig.update_traces(textfont_size=18, textposition="outside", cliponaxis=False)
+    if change < 0:
+        if target == 'Revenue_Per_Email':
+            out = f"${round(change*(-1),2)}"
+        else:
+            out = f"{round(change*(-1),2)}%"
+        fig.update_layout(title_text=f'The following Changes will decrease the {target} by {out}')
+    elif change >= 0:
+        if target == 'Revenue_Per_Email':
+            out = f"${round(change,2)}"
+        else:
+            out = f"{round(change,2)*100}%"
+        fig.update_layout(title_text=f'The following Changes will increase the {target} by {out}')
+#     fig.update_layout(title_text=f'The changes made to the tone intensities')
+    fig.show()
+def load_data():
+    data_location='Tone_and_target.csv'
+    df=pd.read_csv(data_location)
+    df_unique = df.drop_duplicates()
+    df_unique = pd.get_dummies(df_unique, columns=['industry','campaign_type'])
+    df_data=df_unique.drop(columns=['Unnamed: 0','body'])
+    df_data=df_data.rename(columns={'Click-To-Open Rates':'Click_To_Open_Rate','Conversion Rate':'Conversion_Rate','Revenue Per email':'Revenue_Per_Email'})
+    return df_data
+def plot_table(sorted_setence_tuple):
+    """ Plots the bottom most table, takes in a list of tuples where the tuple is the sentence  the sentiment distance
+    from the best values """
+    sentences=list(zip(*sorted_setence_tuple))[0]
+    scores= list(zip(*sorted_setence_tuple))[1]
+    colors= list(zip(*sorted_setence_tuple))[2]
+    rbg_list=[]
+    for i in colors:
+        rbg_list.append('rgb'+str(i))
+    fig = go.Figure(data=[go.Table(
+    header=dict(values=['<b>Sentences</b>', '<b>Difference from Recommended Tone</b>'],
+                line_color = 'darkslategray',
+                fill_color = '#010405',
+                align = 'center',
+                font=dict(family="Metropolis",color='white', size=16)),
+    cells=dict(values=[sentences, # 1st column
+                       scores] , # 2nd column
+               line_color='darkslategray',
+               fill_color=[rbg_list],
+               align=['left','center'],
+               font=dict(family="Arial",size=12)))
+    ])
+    #fig.show()
+def corrections(best,df):
+    """This function finds the the difference between the tone of each sentence and the  best tone for the desired metric
+    best- tone values of the best email for the current categories
+    df- dataframe of the sentences of the uploaded email and the """
+    sentence_order=[]
+    colors=['#48f0c9','#6ef5d6','#94f7e1','#bbfaec','#e6fff9','#ffe7e6','#ffc3bf','#ffa099','#ff7c73','#ff584d'] #loxz green primary to Loxz light red
+    for i in range(len(df['sentencetone'][0])):
+        text=df['text'][0][i]
+        cur=df['sentencetone'][0][i]
+        cosine_distance= spatial.distance.cosine(best,cur)
+        distance=cosine_distance # Cosine distance
+        new_value = round(( (distance - 0) / (1 - 0) ) * (100 - 0) + 0) # for distance metric this is just normalizing the varaible
+        color_value=round(( (distance - 0) / (1 - 0) ) * (10  - 0) + 0) # Color whell value
+        col=colors[(color_value)]
+        rbg=ImageColor.getcolor(f'{col}', "RGB")
+        sentence_order.append((text,new_value,rbg))
+        sorted_sentences=sorted(sentence_order,key=lambda x: x[1],reverse=True)
+    plot_table(sorted_sentences)
+def read_file(fc):
+    with open(fc.selected) as file: # Use file to refer to the file object
+        data = file.read()
+    check=[]
+    b=email.message_from_string(data)
+    for part in b.walk():
+            if part.get_content_type():
+                body = str(part.get_payload())
+                soup = BeautifulSoup(body)
+                paragraphs = soup.find_all('body')
+                for paragraph in paragraphs:
+                    check.append(paragraph.text)
+                file="".join(check)
+    return file

README.md ADDED Viewed

	@@ -0,0 +1,13 @@

+---
+title: Model SA Space
+emoji: 🚀
+colorFrom: yellow
+colorTo: indigo
+sdk: streamlit
+sdk_version: 1.10.0
+app_file: app.py
+pinned: false
+duplicated_from: loxzdigital/Model_SA_Space
+---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

Rate_Models/Click-To-Open_Rates.sav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e9cea028c663cce78104e961e1aa7f6481981d4574d81edcf63a4da416a41f65
+size 3050503

Rate_Models/Conversion_rate.sav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a12e25f7bf2b980f0ff449a692c8cc116ed33ab400ec7a3ede30bcc38cb7db99
+size 1235487

Rate_Models/Revenue_per_email.sav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6a932241992fd2655b373567cc28b74a861e97cf4e0555c5c8f303d861ca3639
+size 625803

Rate_Models/bounce_rate_model.sav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9edd3b1f3c0c4a199d370be5ebbac39729c14ba1c64baf7ed9e4b9b8b682bcab
+size 1490487

Rate_Models/click_trough_rate_model.sav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2bc77675873651fd9cdf2ce585b3673faaffe08ae8f07e06db05989cff253879
+size 3816121

Rate_Models/open_rate_model.sav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e2b1dac902f3081d83f2ce047d725d8a35c59d388993fd7aae3e66437bc25bc4
+size 1974887

Rate_Models/unsubscribe_rate_model.sav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:519c4094f5740288387e5f26097ea7c1d07f244801402268b9bc19962daae1c0
+size 1954791

Tone_and_target.csv ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c329e909d4a78e975696ca39093b627da865b1efed061832222705eefd7c89e3
+size 32018412

app.py ADDED Viewed

	@@ -0,0 +1,291 @@

+from ast import arg
+import FunctionsModelSA_V1
+import numpy as np
+import streamlit as st
+import pandas as pd
+import PIL
+import time
+import main_app
+import utils
+from colour import Color
+import plotly.graph_objects as go
+from nltk import tokenize
+from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
+import nltk
+nltk.download('punkt')
+import codecs
+import pickle
+import string
+from scipy import spatial
+import pytorch_lightning as pl
+from urlextract import URLExtract
+from transformers import BertTokenizerFast as BertTokenizer, BertModel, BertConfig
+import streamlit_analytics
+from FunctionsModelSA_V1 import *
+#from model_V1 import *
+def table_data():
+    # creating table data
+    field = [
+        'Data Scientist',
+        'Dataset',
+        'Algorithm',
+        'Framework',
+        'Ensemble',
+        'Domain',
+        'Model Size'
+    ]
+    data = [
+        'Jeffrey Ott',
+        'Internal + Campaign monitor',
+        'BERT_Uncased_L_2_H_128_A-2, Single Linear Layer Neural Network, Random Forest',
+        'Pytorch',
+        'Bootstrapping',
+        'NLP Text Classification',
+        '16.8 MB'
+    ]
+    data = {
+        'Field':field,
+        'Data':data
+    }
+    df = pd.DataFrame.from_dict(data)
+    return df
+def add_bg_from_url():
+    st.markdown(
+         f"""
+         <style>
+         .stApp {{
+             background-image: linear-gradient(#45eff5,#1C8D99);
+             background-attachment: fixed;
+             background-size: cover
+         }}
+         </style>
+         """,
+         unsafe_allow_html=True
+     )
+add_bg_from_url()
+#linear-gradient(#0A3144,#126072,#1C8D99);
+#add whitespace to fix scroll
+def WHITE_SPACE(lines):
+    for x in range(lines):
+        st.write('&nbsp;')
+with st.sidebar:
+    with st.expander('Model Description', expanded=False):
+        img = PIL.Image.open("figures/ModelSA.png")
+        st.image(img)
+        st.markdown('The model seeks to solve the problem of how to set the tone for an email campaign appropriately. This 5th generation model from the Loxz family uses state-of-the-art NLP to determine and predict the optimized sentiment of an email using tokenization techniques. The model will analyze any email text “shape” and help the user understand the tone and how that tone correlates with the metric of interest. We applied a pre-trained tiny BERT model to vectorize the email campaign text body, then a softmax dense layer was added to get the multi-label classifications. Email metrics are provided prior to campaign launch, and the model determines the optimal engagement rate based on several factors, including inputs by the campaign engineer.')
+    with st.expander('Model Information', expanded=False):
+        hide_table_row_index = """
+            <style>
+            thead tr th:first-child {display:none}
+            tbody th {display:none}
+            </style>
+            """
+        st.markdown(hide_table_row_index, unsafe_allow_html=True)
+        st.table(table_data())
+    utils.url_button('Model Homepage','https://loxz.com/#/models/SA')
+    # url_button('Full Report','https://resources.loxz.com/reports/realtime-ml-character-count-model')
+    utils.url_button('Amazon Market Place','https://aws.amazon.com/marketplace')
+    WHITE_SPACE(15)
+    st.caption("All models owned by Loxz Digital")
+industry_lists = ['Software and Technology', 'Academic and Education',
+           'Entertainment', 'Finance and Banking', 'Hospitality',
+           'Real Estate', 'Retail', 'Energy', 'Healthcare']
+campaign_types = ['Webinar', 'Engagement', 'Product_Announcement', 'Promotional',
+           'Newsletter', 'Abandoned_Cart', 'Review_Request', 'Survey',
+           'Transactional', 'Usage_and_Consumption']
+target_variables = ['Conversion_Rate','Click_To_Open_Rate','Revenue_Per_Email']
+region_variables = ['Africa', 'America', 'Asia', 'Europe', 'Oceania']
+st.markdown("#### Sentiment Analysis: Email Industry v1.2")
+stats_col1, stats_col2, stats_col3, stats_col4 = st.columns([1,1,1,1])
+with stats_col1:
+    st.caption("Verified: Production")
+    #st.metric(label="Verified", value= "Production")
+with stats_col2:
+    st.caption("Accuracy: 85%")
+    #st.metric(label="Accuracy", value= "85%")
+with stats_col3:
+    st.caption("Speed: 3.86 ms")
+    #st.metric(label="Speed", value="3.86 ms")
+with stats_col4:
+    st.caption("Industry: Email")
+#st.metric(label="Industry", value="Email")
+input_text = st.text_area("Please enter your email text here: ", height=50)
+with st.expander('Please select your parameters: '):
+    with streamlit_analytics.track('test123'):
+        industry = st.selectbox(
+            'Please select your industry',
+            industry_lists,
+            index=6
+        )
+        target = st.selectbox(
+            'Please select your target variable',
+            target_variables,
+            index=1
+        )
+        campaign = st.selectbox(
+            'Please select your campaign type',
+            campaign_types,
+            index=7
+        )
+        region = st.selectbox(
+            'Select your target region to generate the email with a more appropriate tone:',
+            region_variables,
+            index=1
+        )
+with st.expander('Adjust your tone intensities for your preference: '):
+#'Joyful', 'Confident', 'Urgent', 'Friendly', 'Optimistic', 'Analytical', 'Casual'
+    joyful_tone_value = st.slider(
+        'Joyful: ',
+        value = 0
+    )
+    st.write('Joyful: ', joyful_tone_value)
+    confident_tone_value = st.slider(
+        'Confident: ',
+        value = 0
+    )
+    st.write('Confident: ', confident_tone_value)
+    urgent_tone_value = st.slider(
+        'Urgent: ',
+        value = 0
+    )
+    st.write('Urgent: ', urgent_tone_value)
+    friendly_tone_value = st.slider(
+        'Friendly: ',
+        value = 0
+    )
+    st.write('Friendly: ', friendly_tone_value)
+    optimistic_tone_value = st.slider(
+        'Optimistic: ',
+        value = 0
+    )
+    st.write('Optimistic: ', optimistic_tone_value)
+    analytical_tone_value = st.slider(
+        'Analytical: ',
+        value = 0
+    )
+    st.write('Analytical: ', analytical_tone_value)
+    casual_tone_value = st.slider(
+        'Casual: ',
+        value = 0
+    )
+    st.write('Casual: ', casual_tone_value)
+Loxz_recom_box = st.checkbox('Select Loxz Recommended Tones for Optimal Output')
+def create_charts():
+    # Starting predictions
+    bucket='emailcampaignmodeldata'
+    # file_key = 'fullEmailBody/fullemailtextbody_labeled_3rates_8tones_20220524.csv'
+    # email_data = utils.get_files_from_aws(bucket,file_key)
+    tone_key = 'ModelSADataSets/Tone_and_target.csv'
+    tone_data = FunctionsModelSA_V1.load_data()
+    test_predictions,tones = FunctionsModelSA_V1.convert_text_to_tone(input_text)
+    # st.dataframe(test_predictions)
+    # st.dataframe(tones)
+    campaign_val='campaign_type_'+ campaign
+    industry_val='industry_'+ industry
+    pred,lower,upper,model = FunctionsModelSA_V1.prediction(tones,campaign_val,industry_val,target)
+    worst_target,best_target,best_target_tones = FunctionsModelSA_V1.find_max_cat(tone_data,target,industry_val,campaign_val)
+    #best_target, best_target_tones
+    #FunctionsModelSA_V1.plot_CI(pred,lower,upper)
+    st.write('The Predicted Value is ' + str(pred))
+    fig1 = go.Figure(go.Bar(
+        name = 'Tone Levels',
+        x=[joyful_tone_value, confident_tone_value, urgent_tone_value, friendly_tone_value, optimistic_tone_value, analytical_tone_value, casual_tone_value],
+        y=['Joyful', 'Confident', 'Urgent', 'Friendly', 'Optimistic', 'Analytical', 'Casual'],
+        orientation='h')
+    )
+    st.plotly_chart(fig1, use_container_width=True)
+    #if((best_target!=0) and (pred<best_target)):
+    if Loxz_recom_box == True:
+        recommended_changes=(best_target_tones)
+        change=best_target-pred
+        #recommend(tones,recommended_changes,change,target)
+        fig2 = go.Figure()
+        fig2.add_trace(go.Bar(
+            y=tone_labels,
+            x=recommended_changes,
+            name='Recommend changes',
+            orientation='h',
+            text=np.round(recommended_changes,3),
+            width=.5,
+            marker=dict(
+            color='#e60f00',
+            line=dict(color='rgba(58, 71, 80, 1.0)', width=1)))
+        )
+        fig2.update_traces(textfont_size=18, textposition="outside", cliponaxis=False)
+        if target == 'Revenue_Per_Email':
+            out = f"${round(change,2)}"
+            st.write("The output will be between the range " + str(round(lower,2)) + ' and ' + str(round(upper,2)))
+            st.write("The Predicted "+str(target) +" is "+ str(round(pred,2)))
+        else:
+            out = f"{round(change,2)*100}%"
+            st.write("The output will be between the range " + str(round(lower,2) * 100) + ' and ' + str(round(upper,2) *100))
+            st.write("The Predicted "+str(target) +" is "+ str(round(pred,2)*100))
+        fig2.update_layout(title_text=f'The following Changes will yield a {out} increase in {target}')
+        st.plotly_chart(fig2, use_container_width=True)
+        #FunctionsModelSA_V1.corrections(best_target_tones,test_predictions))
+if st.button('Generate Predictions'):
+    start_time = time.time()
+    if input_text == "":
+        st.error('Please enter a sentence!')
+    else:
+        placeholder = st.empty()
+        placeholder.text('Loading Data...')
+        create_charts()
+        placeholder.empty()

figures/ModelSA.png ADDED Viewed

main_app.py ADDED Viewed

	@@ -0,0 +1,15 @@

+import streamlit as st
+st.set_page_config(layout="wide")
+st.markdown(
+    """
+<style>
+body {
+    background-image: linear-gradient(#2e7bcf,#2e7bcf);
+    color: white;
+}
+</style>
+""",
+    unsafe_allow_html=True,
+)

models/SAMODEL ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:74109f83ef03f7e9368cf615083c5fa691c88c4d3d6d52f54add5d03804a40b2
+size 17568809

requirements.txt ADDED Viewed

	@@ -0,0 +1,22 @@

+pandas
+numpy
+colour
+nltk
+plotly
+pytorch-lightning
+vaderSentiment
+transformers
+urlextract
+scipy
+bs4
+boto3
+torch
+bokeh==2.4.1
+scikit-learn
+streamlit-analytics
+streamlit
+s3fs
+ipython
+ipywidgets
+sagemaker
+ipyfilechooser

utils.py ADDED Viewed

	@@ -0,0 +1,34 @@

+# import s3fs
+from io import StringIO
+import pandas as pd
+import boto3
+from bokeh.models.widgets import Div
+import streamlit as st
+def get_files_from_aws(bucket,prefix):
+    """
+        get files from aws s3 bucket
+    bucket (STRING): bucket name
+    prefix (STRING): file location in s3 bucket
+    """
+    s3_client = boto3.client('s3',
+        aws_access_key_id = st.secrets["aws_id"],
+        aws_secret_access_key = st.secrets["aws_key"])
+    file_obj = s3_client.get_object(Bucket=bucket,Key=prefix)
+    body = file_obj['Body']
+    string = body.read().decode('utf-8')
+    df = pd.read_csv(StringIO(string))
+    return df
+def url_button(button_name,url):
+    if st.button(button_name):
+        js = """window.open('{url}')""".format(url=url) # New tab or window
+        html = '<img src onerror="{}">'.format(js)
+        div = Div(text=html)
+        st.bokeh_chart(div)