Spaces:
Runtime error
Runtime error
File size: 4,659 Bytes
c5ae5ff 1635166 9b69fbc 1635166 9b69fbc 1635166 9b69fbc 2d6aba9 9b69fbc c5ae5ff 1635166 c5ae5ff 9b69fbc 1635166 9b69fbc 1635166 9b69fbc 8abdd11 c5ae5ff 9b69fbc 01b37c2 c5ae5ff |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 |
import gradio as gr
from transformers import BertModel, BertConfig, BertTokenizer
import torch
import torch.nn as nn
import torch.nn.functional as F
import huggingface_hub
from huggingface_hub import hf_hub_download
import hazm
from cleantext import clean
import regex as re
huggingface_hub.Repository = 'zArabi/Persian-Sentiment-Analysis'
def cleanhtml(raw_html):
cleanr = re.compile('<.*?>')
cleantext = re.sub(cleanr, '', raw_html)
return cleantext
def cleaning(text):
text = text.strip()
# regular cleaning
# https://pypi.org/project/clean-text/ >> works well for eng and de languages
text = clean(text,
fix_unicode=True,
to_ascii=False,
lower=True,
no_line_breaks=True,
no_urls=True,
no_emails=True,
no_phone_numbers=True,
no_numbers=False,
no_digits=False,
no_currency_symbols=True,
no_punct=False, #Keep the punc
replace_with_url="",
replace_with_email="",
replace_with_phone_number="",
replace_with_number="",
replace_with_digit="0",
replace_with_currency_symbol="",
)
# cleaning htmls
text = cleanhtml(text)
# normalizing > https://github.com/sobhe/hazm
normalizer = hazm.Normalizer()
text = normalizer.normalize(text)
# removing wierd patterns
wierd_pattern = re.compile("["
u"\U0001F600-\U0001F64F" # emoticons
u"\U0001F300-\U0001F5FF" # symbols & pictographs
u"\U0001F680-\U0001F6FF" # transport & map symbols
u"\U0001F1E0-\U0001F1FF" # flags (iOS)
u"\U00002702-\U000027B0"
u"\U000024C2-\U0001F251"
u"\U0001f926-\U0001f937"
u'\U00010000-\U0010ffff'
u"\u200d"
u"\u2640-\u2642"
u"\u2600-\u2B55"
u"\u23cf"
u"\u23e9"
u"\u231a"
u"\u3030"
u"\ufe0f"
u"\u2069"
u"\u2066"
# u"\u200c"
u"\u2068"
u"\u2067"
"]+", flags=re.UNICODE)
text = wierd_pattern.sub(r'', text)
# removing extra spaces, hashtags
text = re.sub("#", "", text)
text = re.sub("\s+", " ", text)
return text
class SentimentModel(nn.Module):
def __init__(self, config):
super(SentimentModel, self).__init__()
self.bert = BertModel.from_pretrained(modelName, return_dict=False)
self.dropout = nn.Dropout(0.3)
self.classifier = nn.Linear(config.hidden_size, config.num_labels)
def forward(self, input_ids, attention_mask):
_, pooled_output = self.bert(
input_ids=input_ids,
attention_mask=attention_mask)
pooled_output = self.dropout(pooled_output)
logits = self.classifier(pooled_output)
return logits
modelName = 'HooshvareLab/bert-fa-base-uncased'
class_names = ['negative', 'neutral', 'positive']
label2id = {label: i for i, label in enumerate(class_names)}
id2label = {v: k for k, v in label2id.items()}
config = BertConfig.from_pretrained(
modelName,
num_labels=len(class_names),
id2label=id2label,
label2id=label2id)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
path = 'HooshvareLab-bert-fa-base-uncased-3class.bin'
downloadedModelDict = hf_hub_download(repo_id="zArabi/Persian-Sentiment-Analysis", filename=path)
loaded_model = SentimentModel(config=config).to(device)
loaded_model.load_state_dict(torch.load(downloadedModelDict, map_location="cpu"))
tokenizer = BertTokenizer.from_pretrained(modelName)
max_len=512
def predict(text):
text = cleaning(text)
encoding = tokenizer.encode_plus(
text,
max_length=max_len,
truncation=True,
padding="max_length",
add_special_tokens=True, # Add '[CLS]' and '[SEP]'
return_token_type_ids=True,
return_attention_mask=True,
return_tensors='pt', # Return PyTorch tensors
)
input_ids = encoding["input_ids"].to(device)
attention_mask = encoding["attention_mask"].to(device)
outputs = loaded_model (input_ids, attention_mask)
probs = F.softmax(outputs,dim=1)
values, indices = torch.max(probs, dim=1)
data = {
'comments': text,
'preds': indices.cpu().numpy()[0],
'label': class_names[indices.cpu().numpy()[0]],
'probablities': {class_names[i] : round(probs[0][i].item(),3) for i in range(len(probs[0]))}
}
return {class_names[i] : round(probs[0][i].item(),3) for i in range(len(probs[0]))}
gr.Interface(
predict,
inputs=gr.Textbox(label="Explore your sentence!",lines=2, placeholder="Type Here..."),
outputs=gr.outputs.Label(num_top_classes=3),
title="How are feeling?!!",
).launch() |