epicDev123 commited on
Commit
f17c7fd
·
verified ·
1 Parent(s): 72bfaa9

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +91 -0
app.py ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ import torch.optim as optim
4
+ from torch.utils.data import DataLoader, Dataset
5
+ from sklearn.model_selection import train_test_split
6
+ from transformers import BertTokenizer
7
+
8
+ # Custom Dataset Class for Text Classification
9
+ class TextDataset(Dataset):
10
+ def __init__(self, texts, labels, tokenizer, max_length=512):
11
+ self.texts = texts
12
+ self.labels = labels
13
+ self.tokenizer = tokenizer
14
+ self.max_length = max_length
15
+
16
+ def __len__(self):
17
+ return len(self.texts)
18
+
19
+ def __getitem__(self, idx):
20
+ text = self.texts[idx]
21
+ label = self.labels[idx]
22
+
23
+ # Tokenize text (we can use any tokenizer, like BERT tokenizer)
24
+ encoding = self.tokenizer(text, truncation=True, padding='max_length', max_length=self.max_length, return_tensors='pt')
25
+ input_ids = encoding['input_ids'].squeeze(0) # Remove the extra dimension
26
+
27
+ return {
28
+ 'input_ids': input_ids,
29
+ 'labels': torch.tensor(label, dtype=torch.long)
30
+ }
31
+
32
+ # Define your simple custom model (Feed Forward NN for classification)
33
+ class SimpleNN(nn.Module):
34
+ def __init__(self, vocab_size, hidden_size, output_size):
35
+ super(SimpleNN, self).__init__()
36
+ self.embedding = nn.Embedding(vocab_size, hidden_size)
37
+ self.fc1 = nn.Linear(hidden_size, 128)
38
+ self.fc2 = nn.Linear(128, output_size)
39
+ self.relu = nn.ReLU()
40
+ self.softmax = nn.Softmax(dim=1)
41
+
42
+ def forward(self, input_ids):
43
+ embedded = self.embedding(input_ids)
44
+ x = embedded.mean(dim=1) # Simplified pooling (averaging embeddings)
45
+ x = self.relu(self.fc1(x))
46
+ x = self.fc2(x)
47
+ return self.softmax(x)
48
+
49
+ # Example: Sample Dataset
50
+ texts = ["I love programming.", "I hate bugs.", "Python is great.", "I enjoy learning."]
51
+ labels = [1, 0, 1, 1] # For example, 1 for positive sentiment, 0 for negative
52
+
53
+ # Tokenizer (use any tokenizer - here, we're using a simple one for this example)
54
+ tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
55
+
56
+ # Split into training and validation sets
57
+ train_texts, val_texts, train_labels, val_labels = train_test_split(texts, labels, test_size=0.2)
58
+
59
+ # Create dataset and dataloaders
60
+ train_dataset = TextDataset(train_texts, train_labels, tokenizer)
61
+ val_dataset = TextDataset(val_texts, val_labels, tokenizer)
62
+
63
+ train_loader = DataLoader(train_dataset, batch_size=2, shuffle=True)
64
+ val_loader = DataLoader(val_dataset, batch_size=2)
65
+
66
+ # Initialize the model, optimizer, and loss function
67
+ model = SimpleNN(vocab_size=30522, hidden_size=256, output_size=2) # Output size = 2 for binary classification
68
+ optimizer = optim.Adam(model.parameters(), lr=0.001)
69
+ criterion = nn.CrossEntropyLoss()
70
+
71
+ # Training Loop
72
+ for epoch in range(3): # 3 epochs for example
73
+ model.train()
74
+ for batch in train_loader:
75
+ optimizer.zero_grad()
76
+
77
+ input_ids = batch['input_ids']
78
+ labels = batch['labels']
79
+
80
+ # Forward pass
81
+ outputs = model(input_ids)
82
+ loss = criterion(outputs, labels)
83
+
84
+ # Backward pass
85
+ loss.backward()
86
+ optimizer.step()
87
+
88
+ print(f"Epoch {epoch + 1}, Loss: {loss.item()}")
89
+
90
+ # Save the trained model
91
+ torch.save(model.state_dict(), 'custom_model.pth')