Update index.html
Browse files- index.html +37 -64
index.html
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
<!DOCTYPE html>
|
2 |
-
<html lang="
|
3 |
<head>
|
4 |
<meta charset="UTF-8" />
|
5 |
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
6 |
-
<title
|
7 |
<style>
|
8 |
body {
|
9 |
font-family: sans-serif;
|
@@ -77,10 +77,10 @@
|
|
77 |
color: white;
|
78 |
}
|
79 |
#startButton.start {
|
80 |
-
background-color: #28a745; /*
|
81 |
}
|
82 |
#startButton.stop {
|
83 |
-
background-color: #dc3545; /*
|
84 |
}
|
85 |
label {
|
86 |
font-weight: bold;
|
@@ -96,18 +96,18 @@
|
|
96 |
</style>
|
97 |
</head>
|
98 |
<body>
|
99 |
-
<h1
|
100 |
|
101 |
<div id="videoContainer">
|
102 |
<video id="videoFeed" autoplay playsinline></video>
|
103 |
-
<div id="loadingOverlay"
|
104 |
</div>
|
105 |
<canvas id="canvas" class="hidden"></canvas>
|
106 |
-
<!--
|
107 |
|
108 |
<div class="io-areas">
|
109 |
<div>
|
110 |
-
<label for="instructionText"
|
111 |
<textarea
|
112 |
id="instructionText"
|
113 |
style="height: 2em; width: 40em"
|
@@ -115,28 +115,28 @@
|
|
115 |
></textarea>
|
116 |
</div>
|
117 |
<div>
|
118 |
-
<label for="responseText"
|
119 |
<textarea
|
120 |
id="responseText"
|
121 |
style="height: 2em; width: 40em"
|
122 |
name="Response"
|
123 |
readonly
|
124 |
-
placeholder="
|
125 |
></textarea>
|
126 |
</div>
|
127 |
</div>
|
128 |
|
129 |
<div class="controls">
|
130 |
-
<label for="intervalSelect"
|
131 |
<select id="intervalSelect" name="Interval between 2 requests">
|
132 |
<option value="0" selected>0ms</option>
|
133 |
<option value="100">100ms</option>
|
134 |
<option value="250">250ms</option>
|
135 |
<option value="500">500ms</option>
|
136 |
-
<option value="1000">
|
137 |
-
<option value="2000">
|
138 |
</select>
|
139 |
-
<button id="startButton" class="start"
|
140 |
</div>
|
141 |
|
142 |
<script type="module">
|
@@ -145,7 +145,6 @@
|
|
145 |
AutoModelForVision2Seq,
|
146 |
RawImage,
|
147 |
} from "https://cdn.jsdelivr.net/npm/@huggingface/transformers/dist/transformers.min.js";
|
148 |
-
|
149 |
const video = document.getElementById("videoFeed");
|
150 |
const canvas = document.getElementById("canvas");
|
151 |
const instructionText = document.getElementById("instructionText");
|
@@ -153,20 +152,16 @@
|
|
153 |
const intervalSelect = document.getElementById("intervalSelect");
|
154 |
const startButton = document.getElementById("startButton");
|
155 |
const loadingOverlay = document.getElementById("loadingOverlay");
|
156 |
-
|
157 |
-
instructionText.value = "무엇이 보이나요? 한국어로 대답해주세요."; // 기본 지시사항
|
158 |
-
|
159 |
let stream;
|
160 |
let isProcessing = false;
|
161 |
-
|
162 |
let processor, model;
|
163 |
-
|
164 |
async function initModel() {
|
165 |
-
const modelId = "HuggingFaceTB/SmolVLM-500M-Instruct"; //
|
166 |
loadingOverlay.style.display = "flex";
|
167 |
-
responseText.value = "
|
168 |
processor = await AutoProcessor.from_pretrained(modelId);
|
169 |
-
responseText.value = "
|
170 |
model = await AutoModelForVision2Seq.from_pretrained(modelId, {
|
171 |
dtype: {
|
172 |
embed_tokens: "fp16",
|
@@ -175,10 +170,9 @@
|
|
175 |
},
|
176 |
device: "webgpu",
|
177 |
});
|
178 |
-
responseText.value = "
|
179 |
loadingOverlay.style.display = "none";
|
180 |
}
|
181 |
-
|
182 |
async function initCamera() {
|
183 |
try {
|
184 |
stream = await navigator.mediaDevices.getUserMedia({
|
@@ -186,19 +180,18 @@
|
|
186 |
audio: false,
|
187 |
});
|
188 |
video.srcObject = stream;
|
189 |
-
responseText.value = "
|
190 |
} catch (err) {
|
191 |
-
console.error("
|
192 |
-
responseText.value =
|
193 |
alert(
|
194 |
-
|
195 |
);
|
196 |
}
|
197 |
}
|
198 |
-
|
199 |
function captureImage() {
|
200 |
if (!stream || !video.videoWidth) {
|
201 |
-
console.warn("
|
202 |
return null;
|
203 |
}
|
204 |
canvas.width = video.videoWidth;
|
@@ -208,18 +201,11 @@
|
|
208 |
const frame = context.getImageData(0, 0, canvas.width, canvas.height);
|
209 |
return new RawImage(frame.data, frame.width, frame.height, 4);
|
210 |
}
|
211 |
-
|
212 |
async function runLocalVisionInference(imgElement, instruction) {
|
213 |
-
// 지시사항에 한국어로 대답해달라는 문구가 없으면 추가
|
214 |
-
let koreanInstruction = instruction;
|
215 |
-
if (!instruction.includes("한국어") && !instruction.includes("Korean")) {
|
216 |
-
koreanInstruction = instruction + " (한국어로 대답해주세요)";
|
217 |
-
}
|
218 |
-
|
219 |
const messages = [
|
220 |
{
|
221 |
role: "user",
|
222 |
-
content: [{ type: "image" }, { type: "text", text:
|
223 |
},
|
224 |
];
|
225 |
const text = processor.apply_chat_template(messages, {
|
@@ -238,13 +224,12 @@
|
|
238 |
);
|
239 |
return output[0].trim();
|
240 |
}
|
241 |
-
|
242 |
async function sendData() {
|
243 |
if (!isProcessing) return;
|
244 |
const instruction = instructionText.value;
|
245 |
const rawImg = captureImage();
|
246 |
if (!rawImg) {
|
247 |
-
responseText.value = "
|
248 |
return;
|
249 |
}
|
250 |
try {
|
@@ -252,14 +237,12 @@
|
|
252 |
responseText.value = reply;
|
253 |
} catch (e) {
|
254 |
console.error(e);
|
255 |
-
responseText.value =
|
256 |
}
|
257 |
}
|
258 |
-
|
259 |
function sleep(ms) {
|
260 |
return new Promise((resolve) => setTimeout(resolve, ms));
|
261 |
}
|
262 |
-
|
263 |
async function processingLoop() {
|
264 |
const intervalMs = parseInt(intervalSelect.value, 10);
|
265 |
while (isProcessing) {
|
@@ -268,37 +251,30 @@
|
|
268 |
await sleep(intervalMs);
|
269 |
}
|
270 |
}
|
271 |
-
|
272 |
function handleStart() {
|
273 |
if (!stream) {
|
274 |
-
responseText.value = "
|
275 |
-
alert("
|
276 |
return;
|
277 |
}
|
278 |
isProcessing = true;
|
279 |
-
startButton.textContent = "
|
280 |
startButton.classList.replace("start", "stop");
|
281 |
-
|
282 |
instructionText.disabled = true;
|
283 |
intervalSelect.disabled = true;
|
284 |
-
|
285 |
-
responseText.value = "처리 시작...";
|
286 |
-
|
287 |
processingLoop();
|
288 |
}
|
289 |
-
|
290 |
function handleStop() {
|
291 |
isProcessing = false;
|
292 |
-
startButton.textContent = "
|
293 |
startButton.classList.replace("stop", "start");
|
294 |
-
|
295 |
instructionText.disabled = false;
|
296 |
intervalSelect.disabled = false;
|
297 |
-
if (responseText.value.startsWith("
|
298 |
-
responseText.value = "
|
299 |
}
|
300 |
}
|
301 |
-
|
302 |
startButton.addEventListener("click", () => {
|
303 |
if (isProcessing) {
|
304 |
handleStop();
|
@@ -306,14 +282,13 @@
|
|
306 |
handleStart();
|
307 |
}
|
308 |
});
|
309 |
-
|
310 |
window.addEventListener("DOMContentLoaded", async () => {
|
311 |
-
// WebGPU
|
312 |
if (!navigator.gpu) {
|
313 |
const videoElement = document.getElementById("videoFeed");
|
314 |
const warningElement = document.createElement("p");
|
315 |
warningElement.textContent =
|
316 |
-
"
|
317 |
warningElement.style.color = "red";
|
318 |
warningElement.style.textAlign = "center";
|
319 |
videoElement.parentNode.insertBefore(
|
@@ -321,11 +296,9 @@
|
|
321 |
videoElement.nextSibling
|
322 |
);
|
323 |
}
|
324 |
-
|
325 |
await initModel();
|
326 |
await initCamera();
|
327 |
});
|
328 |
-
|
329 |
window.addEventListener("beforeunload", () => {
|
330 |
if (stream) {
|
331 |
stream.getTracks().forEach((track) => track.stop());
|
@@ -333,4 +306,4 @@
|
|
333 |
});
|
334 |
</script>
|
335 |
</body>
|
336 |
-
</html>
|
|
|
1 |
<!DOCTYPE html>
|
2 |
+
<html lang="en">
|
3 |
<head>
|
4 |
<meta charset="UTF-8" />
|
5 |
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
6 |
+
<title>Camera Interaction App</title>
|
7 |
<style>
|
8 |
body {
|
9 |
font-family: sans-serif;
|
|
|
77 |
color: white;
|
78 |
}
|
79 |
#startButton.start {
|
80 |
+
background-color: #28a745; /* Green */
|
81 |
}
|
82 |
#startButton.stop {
|
83 |
+
background-color: #dc3545; /* Red */
|
84 |
}
|
85 |
label {
|
86 |
font-weight: bold;
|
|
|
96 |
</style>
|
97 |
</head>
|
98 |
<body>
|
99 |
+
<h1>Camera Interaction App</h1>
|
100 |
|
101 |
<div id="videoContainer">
|
102 |
<video id="videoFeed" autoplay playsinline></video>
|
103 |
+
<div id="loadingOverlay">Loading...</div>
|
104 |
</div>
|
105 |
<canvas id="canvas" class="hidden"></canvas>
|
106 |
+
<!-- For capturing frames -->
|
107 |
|
108 |
<div class="io-areas">
|
109 |
<div>
|
110 |
+
<label for="instructionText">Instruction:</label><br />
|
111 |
<textarea
|
112 |
id="instructionText"
|
113 |
style="height: 2em; width: 40em"
|
|
|
115 |
></textarea>
|
116 |
</div>
|
117 |
<div>
|
118 |
+
<label for="responseText">Response:</label><br />
|
119 |
<textarea
|
120 |
id="responseText"
|
121 |
style="height: 2em; width: 40em"
|
122 |
name="Response"
|
123 |
readonly
|
124 |
+
placeholder="Server response will appear here..."
|
125 |
></textarea>
|
126 |
</div>
|
127 |
</div>
|
128 |
|
129 |
<div class="controls">
|
130 |
+
<label for="intervalSelect">Interval between 2 requests:</label>
|
131 |
<select id="intervalSelect" name="Interval between 2 requests">
|
132 |
<option value="0" selected>0ms</option>
|
133 |
<option value="100">100ms</option>
|
134 |
<option value="250">250ms</option>
|
135 |
<option value="500">500ms</option>
|
136 |
+
<option value="1000">1s</option>
|
137 |
+
<option value="2000">2s</option>
|
138 |
</select>
|
139 |
+
<button id="startButton" class="start">Start</button>
|
140 |
</div>
|
141 |
|
142 |
<script type="module">
|
|
|
145 |
AutoModelForVision2Seq,
|
146 |
RawImage,
|
147 |
} from "https://cdn.jsdelivr.net/npm/@huggingface/transformers/dist/transformers.min.js";
|
|
|
148 |
const video = document.getElementById("videoFeed");
|
149 |
const canvas = document.getElementById("canvas");
|
150 |
const instructionText = document.getElementById("instructionText");
|
|
|
152 |
const intervalSelect = document.getElementById("intervalSelect");
|
153 |
const startButton = document.getElementById("startButton");
|
154 |
const loadingOverlay = document.getElementById("loadingOverlay");
|
155 |
+
instructionText.value = "What do you see?"; // default instruction
|
|
|
|
|
156 |
let stream;
|
157 |
let isProcessing = false;
|
|
|
158 |
let processor, model;
|
|
|
159 |
async function initModel() {
|
160 |
+
const modelId = "HuggingFaceTB/SmolVLM-500M-Instruct"; // or "HuggingFaceTB/SmolVLM-Instruct";
|
161 |
loadingOverlay.style.display = "flex";
|
162 |
+
responseText.value = "Loading processor...";
|
163 |
processor = await AutoProcessor.from_pretrained(modelId);
|
164 |
+
responseText.value = "Processor loaded. Loading model...";
|
165 |
model = await AutoModelForVision2Seq.from_pretrained(modelId, {
|
166 |
dtype: {
|
167 |
embed_tokens: "fp16",
|
|
|
170 |
},
|
171 |
device: "webgpu",
|
172 |
});
|
173 |
+
responseText.value = "Model loaded. Initializing camera...";
|
174 |
loadingOverlay.style.display = "none";
|
175 |
}
|
|
|
176 |
async function initCamera() {
|
177 |
try {
|
178 |
stream = await navigator.mediaDevices.getUserMedia({
|
|
|
180 |
audio: false,
|
181 |
});
|
182 |
video.srcObject = stream;
|
183 |
+
responseText.value = "Camera access granted. Ready to start.";
|
184 |
} catch (err) {
|
185 |
+
console.error("Error accessing camera:", err);
|
186 |
+
responseText.value = `Error accessing camera: ${err.name} - ${err.message}. Please ensure permissions are granted and you are on HTTPS or localhost.`;
|
187 |
alert(
|
188 |
+
`Error accessing camera: ${err.name}. Make sure you've granted permission and are on HTTPS or localhost.`
|
189 |
);
|
190 |
}
|
191 |
}
|
|
|
192 |
function captureImage() {
|
193 |
if (!stream || !video.videoWidth) {
|
194 |
+
console.warn("Video stream not ready for capture.");
|
195 |
return null;
|
196 |
}
|
197 |
canvas.width = video.videoWidth;
|
|
|
201 |
const frame = context.getImageData(0, 0, canvas.width, canvas.height);
|
202 |
return new RawImage(frame.data, frame.width, frame.height, 4);
|
203 |
}
|
|
|
204 |
async function runLocalVisionInference(imgElement, instruction) {
|
|
|
|
|
|
|
|
|
|
|
|
|
205 |
const messages = [
|
206 |
{
|
207 |
role: "user",
|
208 |
+
content: [{ type: "image" }, { type: "text", text: instruction }],
|
209 |
},
|
210 |
];
|
211 |
const text = processor.apply_chat_template(messages, {
|
|
|
224 |
);
|
225 |
return output[0].trim();
|
226 |
}
|
|
|
227 |
async function sendData() {
|
228 |
if (!isProcessing) return;
|
229 |
const instruction = instructionText.value;
|
230 |
const rawImg = captureImage();
|
231 |
if (!rawImg) {
|
232 |
+
responseText.value = "Capture failed";
|
233 |
return;
|
234 |
}
|
235 |
try {
|
|
|
237 |
responseText.value = reply;
|
238 |
} catch (e) {
|
239 |
console.error(e);
|
240 |
+
responseText.value = `Error: ${e.message}`;
|
241 |
}
|
242 |
}
|
|
|
243 |
function sleep(ms) {
|
244 |
return new Promise((resolve) => setTimeout(resolve, ms));
|
245 |
}
|
|
|
246 |
async function processingLoop() {
|
247 |
const intervalMs = parseInt(intervalSelect.value, 10);
|
248 |
while (isProcessing) {
|
|
|
251 |
await sleep(intervalMs);
|
252 |
}
|
253 |
}
|
|
|
254 |
function handleStart() {
|
255 |
if (!stream) {
|
256 |
+
responseText.value = "Camera not available. Cannot start.";
|
257 |
+
alert("Camera not available. Please grant permission first.");
|
258 |
return;
|
259 |
}
|
260 |
isProcessing = true;
|
261 |
+
startButton.textContent = "Stop";
|
262 |
startButton.classList.replace("start", "stop");
|
|
|
263 |
instructionText.disabled = true;
|
264 |
intervalSelect.disabled = true;
|
265 |
+
responseText.value = "Processing started...";
|
|
|
|
|
266 |
processingLoop();
|
267 |
}
|
|
|
268 |
function handleStop() {
|
269 |
isProcessing = false;
|
270 |
+
startButton.textContent = "Start";
|
271 |
startButton.classList.replace("stop", "start");
|
|
|
272 |
instructionText.disabled = false;
|
273 |
intervalSelect.disabled = false;
|
274 |
+
if (responseText.value.startsWith("Processing started...")) {
|
275 |
+
responseText.value = "Processing stopped.";
|
276 |
}
|
277 |
}
|
|
|
278 |
startButton.addEventListener("click", () => {
|
279 |
if (isProcessing) {
|
280 |
handleStop();
|
|
|
282 |
handleStart();
|
283 |
}
|
284 |
});
|
|
|
285 |
window.addEventListener("DOMContentLoaded", async () => {
|
286 |
+
// Check for WebGPU support
|
287 |
if (!navigator.gpu) {
|
288 |
const videoElement = document.getElementById("videoFeed");
|
289 |
const warningElement = document.createElement("p");
|
290 |
warningElement.textContent =
|
291 |
+
"WebGPU is not available in this browser.";
|
292 |
warningElement.style.color = "red";
|
293 |
warningElement.style.textAlign = "center";
|
294 |
videoElement.parentNode.insertBefore(
|
|
|
296 |
videoElement.nextSibling
|
297 |
);
|
298 |
}
|
|
|
299 |
await initModel();
|
300 |
await initCamera();
|
301 |
});
|
|
|
302 |
window.addEventListener("beforeunload", () => {
|
303 |
if (stream) {
|
304 |
stream.getTracks().forEach((track) => track.stop());
|
|
|
306 |
});
|
307 |
</script>
|
308 |
</body>
|
309 |
+
</html>
|