leandrumartin commited on
Commit
a98adb9
·
verified ·
1 Parent(s): d55a5fe

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +468 -0
app.py ADDED
@@ -0,0 +1,468 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ from PIL import Image
4
+ from torchvision import transforms
5
+ import numpy as np
6
+ import random
7
+ import torch.nn as nn
8
+ from torch.utils.data import DataLoader, Dataset
9
+ from torchvision.models.resnet import ResNet50_Weights
10
+ from typing import Type, Any, Callable, Union, List, Optional
11
+ from torch import Tensor
12
+ from huggingface_hub import hf_hub_download
13
+
14
+ username = "leandrumartin"
15
+ model_repo = "assignment2model"
16
+ model_path = hf_hub_download(repo_id=f"{username}/{model_repo}", filename="clothing1m.pth")
17
+
18
+ CATEGORY_NAMES = ['T-Shirt', 'Shirt', 'Knitwear', 'Chiffon', 'Sweater', 'Hoodie', 'Windbreaker', 'Jacket', 'Downcoat', 'Suit', 'Shawl', 'Dress', 'Vest', 'Underwear']
19
+
20
+ def conv3x3(in_planes: int, out_planes: int, stride: int = 1, groups: int = 1, dilation: int = 1) -> nn.Conv2d:
21
+ """3x3 convolution with padding"""
22
+ return nn.Conv2d(
23
+ in_planes,
24
+ out_planes,
25
+ kernel_size=3,
26
+ stride=stride,
27
+ padding=dilation,
28
+ groups=groups,
29
+ bias=False,
30
+ dilation=dilation,
31
+ )
32
+
33
+
34
+ def conv1x1(in_planes: int, out_planes: int, stride: int = 1) -> nn.Conv2d:
35
+ """1x1 convolution"""
36
+ return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
37
+
38
+
39
+ class BasicBlock(nn.Module):
40
+ expansion: int = 1
41
+
42
+ def __init__(
43
+ self,
44
+ inplanes: int,
45
+ planes: int,
46
+ stride: int = 1,
47
+ downsample: Optional[nn.Module] = None,
48
+ groups: int = 1,
49
+ base_width: int = 64,
50
+ dilation: int = 1,
51
+ norm_layer: Optional[Callable[..., nn.Module]] = None,
52
+ ) -> None:
53
+ super().__init__()
54
+ if norm_layer is None:
55
+ norm_layer = nn.BatchNorm2d
56
+ if groups != 1 or base_width != 64:
57
+ raise ValueError("BasicBlock only supports groups=1 and base_width=64")
58
+ if dilation > 1:
59
+ raise NotImplementedError("Dilation > 1 not supported in BasicBlock")
60
+ # Both self.conv1 and self.downsample layers downsample the input when stride != 1
61
+ self.conv1 = conv3x3(inplanes, planes, stride)
62
+ self.bn1 = norm_layer(planes)
63
+ self.relu = nn.ReLU(inplace=True)
64
+ self.conv2 = conv3x3(planes, planes)
65
+ self.bn2 = norm_layer(planes)
66
+ self.downsample = downsample
67
+ self.stride = stride
68
+
69
+ def forward(self, x: Tensor) -> Tensor:
70
+ identity = x
71
+
72
+ out = self.conv1(x)
73
+ out = self.bn1(out)
74
+ out = self.relu(out)
75
+
76
+ out = self.conv2(out)
77
+ out = self.bn2(out)
78
+
79
+ if self.downsample is not None:
80
+ identity = self.downsample(x)
81
+
82
+ out += identity
83
+ out = self.relu(out)
84
+
85
+ return out
86
+
87
+
88
+ class Bottleneck(nn.Module):
89
+ # Bottleneck in torchvision places the stride for downsampling at 3x3 convolution(self.conv2)
90
+ # while original implementation places the stride at the first 1x1 convolution(self.conv1)
91
+ # according to "Deep residual learning for image recognition"https://arxiv.org/abs/1512.03385.
92
+ # This variant is also known as ResNet V1.5 and improves accuracy according to
93
+ # https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch.
94
+
95
+ expansion: int = 4
96
+
97
+ def __init__(
98
+ self,
99
+ inplanes: int,
100
+ planes: int,
101
+ stride: int = 1,
102
+ downsample: Optional[nn.Module] = None,
103
+ groups: int = 1,
104
+ base_width: int = 64,
105
+ dilation: int = 1,
106
+ norm_layer: Optional[Callable[..., nn.Module]] = None,
107
+ ) -> None:
108
+ super().__init__()
109
+ if norm_layer is None:
110
+ norm_layer = nn.BatchNorm2d
111
+ width = int(planes * (base_width / 64.0)) * groups
112
+ # Both self.conv2 and self.downsample layers downsample the input when stride != 1
113
+ self.conv1 = conv1x1(inplanes, width)
114
+ self.bn1 = norm_layer(width)
115
+ self.conv2 = conv3x3(width, width, stride, groups, dilation)
116
+ self.bn2 = norm_layer(width)
117
+ self.conv3 = conv1x1(width, planes * self.expansion)
118
+ self.bn3 = norm_layer(planes * self.expansion)
119
+ self.relu = nn.ReLU(inplace=True)
120
+ self.downsample = downsample
121
+ self.stride = stride
122
+
123
+ def forward(self, x: Tensor) -> Tensor:
124
+ identity = x
125
+
126
+ out = self.conv1(x)
127
+ out = self.bn1(out)
128
+ out = self.relu(out)
129
+
130
+ out = self.conv2(out)
131
+ out = self.bn2(out)
132
+ out = self.relu(out)
133
+
134
+ out = self.conv3(out)
135
+ out = self.bn3(out)
136
+
137
+ if self.downsample is not None:
138
+ identity = self.downsample(x)
139
+
140
+ out += identity
141
+ out = self.relu(out)
142
+
143
+ return out
144
+
145
+
146
+ class ResNet(nn.Module):
147
+ def __init__(
148
+ self,
149
+ block: Type[Union[BasicBlock, Bottleneck]],
150
+ layers: List[int],
151
+ num_classes: int = 1000,
152
+ show: bool = False,
153
+ zero_init_residual: bool = False,
154
+ groups: int = 1,
155
+ width_per_group: int = 64,
156
+ replace_stride_with_dilation: Optional[List[bool]] = None,
157
+ norm_layer: Optional[Callable[..., nn.Module]] = None,
158
+ ) -> None:
159
+ super().__init__()
160
+ if norm_layer is None:
161
+ norm_layer = nn.BatchNorm2d
162
+ self._norm_layer = norm_layer
163
+
164
+ self.show = show
165
+ self.inplanes = 64
166
+ self.dilation = 1
167
+ if replace_stride_with_dilation is None:
168
+ # each element in the tuple indicates if we should replace
169
+ # the 2x2 stride with a dilated convolution instead
170
+ replace_stride_with_dilation = [False, False, False]
171
+ if len(replace_stride_with_dilation) != 3:
172
+ raise ValueError(
173
+ "replace_stride_with_dilation should be None "
174
+ f"or a 3-element tuple, got {replace_stride_with_dilation}"
175
+ )
176
+ self.groups = groups
177
+ self.base_width = width_per_group
178
+ self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3, bias=False)
179
+ self.bn1 = norm_layer(self.inplanes)
180
+ self.relu = nn.ReLU(inplace=True)
181
+ self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
182
+ self.layer1 = self._make_layer(block, 64, layers[0])
183
+ self.layer2 = self._make_layer(block, 128, layers[1], stride=2, dilate=replace_stride_with_dilation[0])
184
+ self.layer3 = self._make_layer(block, 256, layers[2], stride=2, dilate=replace_stride_with_dilation[1])
185
+ self.layer4 = self._make_layer(block, 512, layers[3], stride=2, dilate=replace_stride_with_dilation[2])
186
+ self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
187
+ self.fc = nn.Linear(512 * block.expansion, num_classes)
188
+ # self.fc1 = nn.Linear(512 * block.expansion, 512)
189
+ # self.lu = nn.LeakyReLU(0.1, inplace=True)
190
+ # self.fc2 = nn.Linear(512, num_classes)
191
+
192
+ for m in self.modules():
193
+ if isinstance(m, nn.Conv2d):
194
+ nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu")
195
+ elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
196
+ nn.init.constant_(m.weight, 1)
197
+ nn.init.constant_(m.bias, 0)
198
+
199
+ # Zero-initialize the last BN in each residual branch,
200
+ # so that the residual branch starts with zeros, and each residual block behaves like an identity.
201
+ # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
202
+ if zero_init_residual:
203
+ for m in self.modules():
204
+ if isinstance(m, Bottleneck) and m.bn3.weight is not None:
205
+ nn.init.constant_(m.bn3.weight, 0) # type: ignore[arg-type]
206
+ elif isinstance(m, BasicBlock) and m.bn2.weight is not None:
207
+ nn.init.constant_(m.bn2.weight, 0) # type: ignore[arg-type]
208
+
209
+ def _make_layer(
210
+ self,
211
+ block: Type[Union[BasicBlock, Bottleneck]],
212
+ planes: int,
213
+ blocks: int,
214
+ stride: int = 1,
215
+ dilate: bool = False,
216
+ ) -> nn.Sequential:
217
+ norm_layer = self._norm_layer
218
+ downsample = None
219
+ previous_dilation = self.dilation
220
+ if dilate:
221
+ self.dilation *= stride
222
+ stride = 1
223
+ if stride != 1 or self.inplanes != planes * block.expansion:
224
+ downsample = nn.Sequential(
225
+ conv1x1(self.inplanes, planes * block.expansion, stride),
226
+ norm_layer(planes * block.expansion),
227
+ )
228
+
229
+ layers = []
230
+ layers.append(
231
+ block(
232
+ self.inplanes, planes, stride, downsample, self.groups, self.base_width, previous_dilation, norm_layer
233
+ )
234
+ )
235
+ self.inplanes = planes * block.expansion
236
+ for _ in range(1, blocks):
237
+ layers.append(
238
+ block(
239
+ self.inplanes,
240
+ planes,
241
+ groups=self.groups,
242
+ base_width=self.base_width,
243
+ dilation=self.dilation,
244
+ norm_layer=norm_layer,
245
+ )
246
+ )
247
+
248
+ return nn.Sequential(*layers)
249
+
250
+ def _forward_impl(self, x: Tensor) -> Tensor:
251
+ # See note [TorchScript super()]
252
+ x = self.conv1(x)
253
+ x = self.bn1(x)
254
+ x = self.relu(x)
255
+ x = self.maxpool(x)
256
+
257
+ x = self.layer1(x)
258
+ x = self.layer2(x)
259
+ x = self.layer3(x)
260
+ x = self.layer4(x)
261
+
262
+ x = self.avgpool(x)
263
+ x = torch.flatten(x, 1)
264
+ out = self.fc(x)
265
+ # x = self.lu(self.fc1(x))
266
+ # out = self.fc2(x)
267
+ if self.show:
268
+ return out, x
269
+ else:
270
+ return out
271
+
272
+ def forward(self, x: Tensor) -> Tensor:
273
+ return self._forward_impl(x)
274
+
275
+
276
+ def _resnet(
277
+ block: Type[Union[BasicBlock, Bottleneck]],
278
+ layers: List[int],
279
+ num_classes,
280
+ show,
281
+ **kwargs: Any,
282
+ ) -> ResNet:
283
+
284
+ model = ResNet(block, layers, num_classes, show, **kwargs)
285
+
286
+ return model
287
+
288
+
289
+
290
+
291
+ def resnet50(num_classes, show=False, **kwargs: Any) -> ResNet:
292
+ """ResNet-50 from `Deep Residual Learning for Image Recognition <https://arxiv.org/pdf/1512.03385.pdf>`__.
293
+
294
+ .. note::
295
+ The bottleneck of TorchVision places the stride for downsampling to the second 3x3
296
+ convolution while the original paper places it to the first 1x1 convolution.
297
+ This variant improves the accuracy and is known as `ResNet V1.5
298
+ <https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch>`_.
299
+
300
+ Args:
301
+ weights (:class:`~torchvision.models.ResNet50_Weights`, optional): The
302
+ pretrained weights to use. See
303
+ :class:`~torchvision.models.ResNet50_Weights` below for
304
+ more details, and possible values. By default, no pre-trained
305
+ weights are used.
306
+ progress (bool, optional): If True, displays a progress bar of the
307
+ download to stderr. Default is True.
308
+ **kwargs: parameters passed to the ``torchvision.models.resnet.ResNet``
309
+ base class. Please refer to the `source code
310
+ <https://github.com/pytorch/vision/blob/main/torchvision/models/resnet.py>`_
311
+ for more details about this class.
312
+
313
+ .. autoclass:: torchvision.models.ResNet50_Weights
314
+ :members:
315
+ """
316
+
317
+ return _resnet(Bottleneck, [3, 4, 6, 3], num_classes, show, **kwargs)
318
+
319
+
320
+
321
+ class Clothing1M(Dataset):
322
+ def __init__(self, image, train=True, transform=None, target_transform=None, augment=False, mode='noisy'):
323
+ self.image = image
324
+ self.transform = transform
325
+ self.target_transform = target_transform
326
+ self.augment = augment
327
+ self.train = False
328
+ self.mode = mode
329
+
330
+ self.data = [self.image]
331
+
332
+ def __getitem__(self, index):
333
+ img, target = self.data[index], 0
334
+
335
+ # to return a PIL Image
336
+ # img_origin = Image.open(img).convert('RGB')
337
+ img_origin = Image.fromarray(img).convert('RGB')
338
+
339
+ if self.transform is not None:
340
+ img = self.transform(img_origin)
341
+ if self.augment:
342
+ img1 = self.transform(img_origin)
343
+
344
+ if self.target_transform is not None:
345
+ target = self.target_transform(target)
346
+
347
+ return img, 0
348
+
349
+ def __len__(self):
350
+ return len(self.data)
351
+
352
+ def set_seed(seed):
353
+ torch.manual_seed(seed)
354
+ torch.cuda.manual_seed_all(seed)
355
+ np.random.seed(seed)
356
+ random.seed(seed)
357
+ torch.backends.cudnn.deterministic = True
358
+ torch.backends.cudnn.benchmark = False
359
+
360
+ def preprocess_image(image):
361
+ pass
362
+
363
+ def classify_image(image):
364
+ args = {
365
+ 'overwrite': False,
366
+ 'tqdm': 0,
367
+ 'config_file': 'configs/clothing1m.yaml',
368
+ 'dataset': 'clothing1M',
369
+ 'root': './data',
370
+ 'noise_type': 'clean',
371
+ 'noise_rate': 0.0,
372
+ 'save_dir': None,
373
+ 'gpus': '0',
374
+ 'num_workers': 8,
375
+ 'grad_bound': 0.0,
376
+ 'seed': 233,
377
+ 'backbone': 'res50',
378
+ 'optimizer': 'sgd',
379
+ 'momentum': 0.9,
380
+ 'nesterov': False,
381
+ 'pretrained': True,
382
+ 'ssl_pretrained': None,
383
+ 'resume': model_path,
384
+ 'lr': 0.01,
385
+ 'scheduler': 'cos',
386
+ 'milestones': None,
387
+ 'gamma': None,
388
+ 'weight_decay': 0.0001,
389
+ 'batch_size': 128,
390
+ 'start_epoch': None,
391
+ 'epochs': 100,
392
+ 'warmup': 0,
393
+ 'ema': False,
394
+ 'beta': 1.0,
395
+ 'num_classes': 14,
396
+ }
397
+
398
+ device = 'cpu'
399
+ set_seed(args['seed'])
400
+
401
+ MEAN = (0.485, 0.456, 0.406)
402
+ STD = (0.229, 0.224, 0.225)
403
+
404
+ test_loader = DataLoader(
405
+ dataset=Clothing1M(
406
+ image=image,
407
+ train=False,
408
+ transform=transforms.Compose([
409
+ transforms.Resize(256),
410
+ transforms.CenterCrop(224),
411
+ transforms.ToTensor(),
412
+ transforms.Normalize(MEAN, STD)]
413
+ )),
414
+ batch_size=256,
415
+ shuffle=False,
416
+ pin_memory=True,
417
+ num_workers=args['num_workers'])
418
+
419
+ model = resnet50(num_classes=args['num_classes'], show=True)
420
+ nFeat = 2048
421
+
422
+ state_dict = ResNet50_Weights.IMAGENET1K_V2.get_state_dict(progress=True)
423
+ state_dict = {k:v for k,v in state_dict.items() if 'fc' not in k}
424
+ missing, unexpected = model.load_state_dict(state_dict, strict=False)
425
+ print('Loading ImageNet pretrained model')
426
+ print('Model missing keys:\n', missing)
427
+ print('Model unexpected keys:\n', unexpected)
428
+
429
+ checkpoint = torch.load(args['resume'], map_location=torch.device(device))
430
+ state_dict = checkpoint['model_state_dict']
431
+ for key in list(state_dict.keys()):
432
+ if 'ema_model' in key:
433
+ state_dict[key.replace('ema_model.', '')] = state_dict[key]
434
+ del state_dict[key]
435
+ else:
436
+ del state_dict[key]
437
+ model.load_state_dict(state_dict)
438
+ epoch = checkpoint['epoch']
439
+ if args['start_epoch'] is None:
440
+ args['start_epoch'] = epoch + 1
441
+
442
+ model = model.to(device)
443
+
444
+ loader_x, loader_y = None, None
445
+ for x, y in test_loader:
446
+ print(x)
447
+ print(y)
448
+ loader_x, loader_y = x.to(device), y.to(device)
449
+ break
450
+ z, _ = model(loader_x)
451
+ pred = torch.argmax(z, 1)
452
+ prediction_label = CATEGORY_NAMES[pred.item()]
453
+ return f'Predicted label: {prediction_label}'
454
+
455
+ # Example image query (optional but recommended for demonstration)
456
+ example_image = "./examples/image_0.jpg" # Ensure this image is available in the repo
457
+ example_image_2 = "./examples/image_7.jpg"
458
+
459
+ # Create Gradio interface
460
+ interface = gr.Interface(
461
+ fn=classify_image,
462
+ inputs=gr.Image(),
463
+ outputs=gr.Text(),
464
+ examples=[example_image, example_image_2] # Include an example input for users -- you will want to find a relevant image to include and push it to your HuggingFace Space
465
+ )
466
+
467
+ if __name__ == "__main__":
468
+ interface.launch()