chenyigen akhaliq HF Staff commited on
Commit
48d60dc
·
0 Parent(s):

Duplicate from pytorch/SlowFast

Browse files

Co-authored-by: Ahsen Khaliq <akhaliq@users.noreply.huggingface.co>

Files changed (4) hide show
  1. .gitattributes +27 -0
  2. README.md +12 -0
  3. app.py +131 -0
  4. requirements.txt +4 -0
.gitattributes ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bin.* filter=lfs diff=lfs merge=lfs -text
5
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.model filter=lfs diff=lfs merge=lfs -text
12
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
13
+ *.onnx filter=lfs diff=lfs merge=lfs -text
14
+ *.ot filter=lfs diff=lfs merge=lfs -text
15
+ *.parquet filter=lfs diff=lfs merge=lfs -text
16
+ *.pb filter=lfs diff=lfs merge=lfs -text
17
+ *.pt filter=lfs diff=lfs merge=lfs -text
18
+ *.pth filter=lfs diff=lfs merge=lfs -text
19
+ *.rar filter=lfs diff=lfs merge=lfs -text
20
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
21
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
22
+ *.tflite filter=lfs diff=lfs merge=lfs -text
23
+ *.tgz filter=lfs diff=lfs merge=lfs -text
24
+ *.xz filter=lfs diff=lfs merge=lfs -text
25
+ *.zip filter=lfs diff=lfs merge=lfs -text
26
+ *.zstandard filter=lfs diff=lfs merge=lfs -text
27
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: SlowFast
3
+ emoji: 🐠
4
+ colorFrom: blue
5
+ colorTo: red
6
+ sdk: gradio
7
+ app_file: app.py
8
+ pinned: false
9
+ duplicated_from: pytorch/SlowFast
10
+ ---
11
+
12
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces#reference
app.py ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ # Choose the `slowfast_r50` model
3
+ model = torch.hub.load('facebookresearch/pytorchvideo', 'slowfast_r50', pretrained=True)
4
+ from typing import Dict
5
+ import json
6
+ import urllib
7
+ from torchvision.transforms import Compose, Lambda
8
+ from torchvision.transforms._transforms_video import (
9
+ CenterCropVideo,
10
+ NormalizeVideo,
11
+ )
12
+ from pytorchvideo.data.encoded_video import EncodedVideo
13
+ from pytorchvideo.transforms import (
14
+ ApplyTransformToKey,
15
+ ShortSideScale,
16
+ UniformTemporalSubsample,
17
+ UniformCropVideo
18
+ )
19
+
20
+ import gradio as gr
21
+ # Set to GPU or CPU
22
+ device = "cpu"
23
+ model = model.eval()
24
+ model = model.to(device)
25
+ json_url = "https://dl.fbaipublicfiles.com/pyslowfast/dataset/class_names/kinetics_classnames.json"
26
+ json_filename = "kinetics_classnames.json"
27
+ try: urllib.URLopener().retrieve(json_url, json_filename)
28
+ except: urllib.request.urlretrieve(json_url, json_filename)
29
+ with open(json_filename, "r") as f:
30
+ kinetics_classnames = json.load(f)
31
+
32
+ # Create an id to label name mapping
33
+ kinetics_id_to_classname = {}
34
+ for k, v in kinetics_classnames.items():
35
+ kinetics_id_to_classname[v] = str(k).replace('"', "")
36
+ side_size = 256
37
+ mean = [0.45, 0.45, 0.45]
38
+ std = [0.225, 0.225, 0.225]
39
+ crop_size = 256
40
+ num_frames = 32
41
+ sampling_rate = 2
42
+ frames_per_second = 30
43
+ slowfast_alpha = 4
44
+ num_clips = 10
45
+ num_crops = 3
46
+
47
+ class PackPathway(torch.nn.Module):
48
+ """
49
+ Transform for converting video frames as a list of tensors.
50
+ """
51
+ def __init__(self):
52
+ super().__init__()
53
+
54
+ def forward(self, frames: torch.Tensor):
55
+ fast_pathway = frames
56
+ # Perform temporal sampling from the fast pathway.
57
+ slow_pathway = torch.index_select(
58
+ frames,
59
+ 1,
60
+ torch.linspace(
61
+ 0, frames.shape[1] - 1, frames.shape[1] // slowfast_alpha
62
+ ).long(),
63
+ )
64
+ frame_list = [slow_pathway, fast_pathway]
65
+ return frame_list
66
+
67
+ transform = ApplyTransformToKey(
68
+ key="video",
69
+ transform=Compose(
70
+ [
71
+ UniformTemporalSubsample(num_frames),
72
+ Lambda(lambda x: x/255.0),
73
+ NormalizeVideo(mean, std),
74
+ ShortSideScale(
75
+ size=side_size
76
+ ),
77
+ CenterCropVideo(crop_size),
78
+ PackPathway()
79
+ ]
80
+ ),
81
+ )
82
+
83
+ # The duration of the input clip is also specific to the model.
84
+ clip_duration = (num_frames * sampling_rate)/frames_per_second
85
+ url_link = "https://dl.fbaipublicfiles.com/pytorchvideo/projects/archery.mp4"
86
+ video_path = 'archery.mp4'
87
+ try: urllib.URLopener().retrieve(url_link, video_path)
88
+ except: urllib.request.urlretrieve(url_link, video_path)
89
+ # Select the duration of the clip to load by specifying the start and end duration
90
+ # The start_sec should correspond to where the action occurs in the video
91
+
92
+ def inference(in_vid):
93
+ start_sec = 0
94
+ end_sec = start_sec + clip_duration
95
+
96
+ # Initialize an EncodedVideo helper class and load the video
97
+ video = EncodedVideo.from_path(in_vid)
98
+
99
+ # Load the desired clip
100
+ video_data = video.get_clip(start_sec=start_sec, end_sec=end_sec)
101
+
102
+ # Apply a transform to normalize the video input
103
+ video_data = transform(video_data)
104
+
105
+ # Move the inputs to the desired device
106
+ inputs = video_data["video"]
107
+ inputs = [i.to(device)[None, ...] for i in inputs]
108
+ # Pass the input clip through the model
109
+ preds = model(inputs)
110
+
111
+ # Get the predicted classes
112
+ post_act = torch.nn.Softmax(dim=1)
113
+ preds = post_act(preds)
114
+ pred_classes = preds.topk(k=5).indices[0]
115
+
116
+ # Map the predicted classes to the label names
117
+ pred_class_names = [kinetics_id_to_classname[int(i)] for i in pred_classes]
118
+ return "%s" % ", ".join(pred_class_names)
119
+
120
+ inputs = gr.inputs.Video(label="Input Video")
121
+ outputs = gr.outputs.Textbox(label="Top 5 predicted labels")
122
+
123
+ title = "SLOWFAST"
124
+ description = "demo for SLOWFAST, SlowFast networks pretrained on the Kinetics 400 dataset. To use it, simply upload your video, or click one of the examples to load them. Read more at the links below."
125
+ article = "<p style='text-align: center'><a href='https://arxiv.org/abs/1812.03982'>SlowFast Networks for Video Recognition</a> | <a href='https://github.com/facebookresearch/pytorchvideo'>Github Repo</a></p>"
126
+
127
+ examples = [
128
+ ['archery.mp4']
129
+ ]
130
+
131
+ gr.Interface(inference, inputs, outputs, title=title, description=description, article=article, examples=examples, analytics_enabled=False).launch(debug=True)
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ av
2
+ fvcore
3
+ torch
4
+ torchvision