File size: 3,709 Bytes
6ed3479
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
668d34d
6ed3479
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
import numpy as np
import torch

if torch.cuda.is_available():
    device = torch.device("cuda")
    print('We will use the GPU:', torch.cuda.get_device_name(0))
else:
    device = torch.device("cpu")
    print('No GPU available, using the CPU instead.')

import numpy as np
import gradio as gr
from transformers import BertTokenizer, AutoTokenizer
from torch.utils.data import TensorDataset, random_split
from torch.utils.data import DataLoader, RandomSampler, SequentialSampler
from transformers import BertForSequenceClassification, AdamW, BertConfig
import random
tokenizer = AutoTokenizer.from_pretrained('armansakif/bengali-fake-news')

model = BertForSequenceClassification.from_pretrained(
    "armansakif/bengali-fake-news", # Use the 12-layer BERT model, with an uncased vocab.
    num_labels = 2, # The number of output labels--2 for binary classification.
                    # You can increase this for multi-class tasks.   
    output_attentions = False, # Whether the model returns attentions weights.
    output_hidden_states = False, # Whether the model returns all hidden-states.
)
# model.cuda()

def classify_news(news):
  label_list = []
  input_ids = []
  attention_masks = []
  sent = news
  label_list.append(0)
  encoded_dict = tokenizer.encode_plus(
                      sent,                      # Sentence to encode.
                      add_special_tokens = True, # Add '[CLS]' and '[SEP]'
                      max_length = 512,           # Pad & truncate all sentences.
                      pad_to_max_length = True,
                      return_attention_mask = True,   # Construct attn. masks.
                      truncation = True,
                      return_tensors = 'pt',     # Return pytorch tensors.
                  )

  input_ids.append(encoded_dict['input_ids'])

  attention_masks.append(encoded_dict['attention_mask'])

  # Convert the lists into tensors.
  input_ids = torch.cat(input_ids, dim=0)
  attention_masks = torch.cat(attention_masks, dim=0)
  labels = torch.tensor(label_list)

  testdataset = TensorDataset(input_ids, attention_masks, labels)

  test_dataloader = DataLoader(
              testdataset, # The validation samples.
              sampler = SequentialSampler(testdataset), # Pull out batches sequentially.
              batch_size = 16 # Evaluate with this batch size.
          )

  model.eval()

  y_prob = []

  for batch in test_dataloader:

      b_input_ids = batch[0].to(device)
      b_input_mask = batch[1].to(device)
      b_labels = batch[2].to(device)

      with torch.no_grad():

          outputs = model(b_input_ids,
                                  token_type_ids=None,
                                  attention_mask=b_input_mask,
                                  labels=b_labels)
          loss = outputs[0]
          logits = outputs[1]

          # probability in percent code
          prediction_probs = torch.nn.functional.softmax(logits)
          y_prob.extend(prediction_probs.detach().cpu().numpy())

          print(y_prob[0][0])
          print(y_prob[0][1])
          #-------------------------------------------------------------

          _, prediction = torch.max(logits, dim=1)
          prediction = prediction.cpu().detach().numpy()
          # targets = b_labels.cpu().detach().numpy()

          result = 'Fake News'
          if prediction[0] :
            result = 'Authentic News'
          print(result)
          labels = ['fake', 'authentic']

          return {labels[i]: float(y_prob[0][i]) for i in range(2)}
demo = gr.Interface(
    fn=classify_news,
    inputs=gr.Textbox(lines=10, placeholder="News here..."),
    outputs=gr.Label(num_top_classes=2)
)
demo.launch(inline=False)