ginipick commited on
Commit
cda939c
·
verified ·
1 Parent(s): 1fd79c1

Update index.html

Browse files
Files changed (1) hide show
  1. index.html +37 -64
index.html CHANGED
@@ -1,9 +1,9 @@
1
  <!DOCTYPE html>
2
- <html lang="ko">
3
  <head>
4
  <meta charset="UTF-8" />
5
  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
6
- <title>카메라 상호작용 앱</title>
7
  <style>
8
  body {
9
  font-family: sans-serif;
@@ -77,10 +77,10 @@
77
  color: white;
78
  }
79
  #startButton.start {
80
- background-color: #28a745; /* 녹색 */
81
  }
82
  #startButton.stop {
83
- background-color: #dc3545; /* 빨간색 */
84
  }
85
  label {
86
  font-weight: bold;
@@ -96,18 +96,18 @@
96
  </style>
97
  </head>
98
  <body>
99
- <h1>카메라 상호작용 앱</h1>
100
 
101
  <div id="videoContainer">
102
  <video id="videoFeed" autoplay playsinline></video>
103
- <div id="loadingOverlay">로딩 중...</div>
104
  </div>
105
  <canvas id="canvas" class="hidden"></canvas>
106
- <!-- 프레임 캡처용 -->
107
 
108
  <div class="io-areas">
109
  <div>
110
- <label for="instructionText">지시사항:</label><br />
111
  <textarea
112
  id="instructionText"
113
  style="height: 2em; width: 40em"
@@ -115,28 +115,28 @@
115
  ></textarea>
116
  </div>
117
  <div>
118
- <label for="responseText">응답:</label><br />
119
  <textarea
120
  id="responseText"
121
  style="height: 2em; width: 40em"
122
  name="Response"
123
  readonly
124
- placeholder="서버 응답이 여기에 표시됩니다..."
125
  ></textarea>
126
  </div>
127
  </div>
128
 
129
  <div class="controls">
130
- <label for="intervalSelect">요청 간격:</label>
131
  <select id="intervalSelect" name="Interval between 2 requests">
132
  <option value="0" selected>0ms</option>
133
  <option value="100">100ms</option>
134
  <option value="250">250ms</option>
135
  <option value="500">500ms</option>
136
- <option value="1000">1초</option>
137
- <option value="2000">2초</option>
138
  </select>
139
- <button id="startButton" class="start">시작</button>
140
  </div>
141
 
142
  <script type="module">
@@ -145,7 +145,6 @@
145
  AutoModelForVision2Seq,
146
  RawImage,
147
  } from "https://cdn.jsdelivr.net/npm/@huggingface/transformers/dist/transformers.min.js";
148
-
149
  const video = document.getElementById("videoFeed");
150
  const canvas = document.getElementById("canvas");
151
  const instructionText = document.getElementById("instructionText");
@@ -153,20 +152,16 @@
153
  const intervalSelect = document.getElementById("intervalSelect");
154
  const startButton = document.getElementById("startButton");
155
  const loadingOverlay = document.getElementById("loadingOverlay");
156
-
157
- instructionText.value = "무엇이 보이나요? 한국어로 대답해주세요."; // 기본 지시사항
158
-
159
  let stream;
160
  let isProcessing = false;
161
-
162
  let processor, model;
163
-
164
  async function initModel() {
165
- const modelId = "HuggingFaceTB/SmolVLM-500M-Instruct"; // 또는 "HuggingFaceTB/SmolVLM-Instruct";
166
  loadingOverlay.style.display = "flex";
167
- responseText.value = "프로세서 로딩 중...";
168
  processor = await AutoProcessor.from_pretrained(modelId);
169
- responseText.value = "프로세서 로딩 완료. 모델 로딩 중...";
170
  model = await AutoModelForVision2Seq.from_pretrained(modelId, {
171
  dtype: {
172
  embed_tokens: "fp16",
@@ -175,10 +170,9 @@
175
  },
176
  device: "webgpu",
177
  });
178
- responseText.value = "모델 로딩 완료. 카메라 초기화 중...";
179
  loadingOverlay.style.display = "none";
180
  }
181
-
182
  async function initCamera() {
183
  try {
184
  stream = await navigator.mediaDevices.getUserMedia({
@@ -186,19 +180,18 @@
186
  audio: false,
187
  });
188
  video.srcObject = stream;
189
- responseText.value = "카메라 접근 권한이 허용되었습니다. 시작할 준비가 되었습니다.";
190
  } catch (err) {
191
- console.error("카메라 접근 오류:", err);
192
- responseText.value = `카메라 접근 오류: ${err.name} - ${err.message}. 권한이 허용되었는지, HTTPS 또는 localhost에서 실행 중인지 확인하세요.`;
193
  alert(
194
- `카메라 접근 오류: ${err.name}. 권한을 허용했는지, HTTPS 또는 localhost에서 실행 중인지 확인하세요.`
195
  );
196
  }
197
  }
198
-
199
  function captureImage() {
200
  if (!stream || !video.videoWidth) {
201
- console.warn("캡처할 비디오 스트림이 준비되지 않았습니다.");
202
  return null;
203
  }
204
  canvas.width = video.videoWidth;
@@ -208,18 +201,11 @@
208
  const frame = context.getImageData(0, 0, canvas.width, canvas.height);
209
  return new RawImage(frame.data, frame.width, frame.height, 4);
210
  }
211
-
212
  async function runLocalVisionInference(imgElement, instruction) {
213
- // 지시사항에 한국어로 대답해달라는 문구가 없으면 추가
214
- let koreanInstruction = instruction;
215
- if (!instruction.includes("한국어") && !instruction.includes("Korean")) {
216
- koreanInstruction = instruction + " (한국어로 대답해주세요)";
217
- }
218
-
219
  const messages = [
220
  {
221
  role: "user",
222
- content: [{ type: "image" }, { type: "text", text: koreanInstruction }],
223
  },
224
  ];
225
  const text = processor.apply_chat_template(messages, {
@@ -238,13 +224,12 @@
238
  );
239
  return output[0].trim();
240
  }
241
-
242
  async function sendData() {
243
  if (!isProcessing) return;
244
  const instruction = instructionText.value;
245
  const rawImg = captureImage();
246
  if (!rawImg) {
247
- responseText.value = "캡처 실패";
248
  return;
249
  }
250
  try {
@@ -252,14 +237,12 @@
252
  responseText.value = reply;
253
  } catch (e) {
254
  console.error(e);
255
- responseText.value = `오류: ${e.message}`;
256
  }
257
  }
258
-
259
  function sleep(ms) {
260
  return new Promise((resolve) => setTimeout(resolve, ms));
261
  }
262
-
263
  async function processingLoop() {
264
  const intervalMs = parseInt(intervalSelect.value, 10);
265
  while (isProcessing) {
@@ -268,37 +251,30 @@
268
  await sleep(intervalMs);
269
  }
270
  }
271
-
272
  function handleStart() {
273
  if (!stream) {
274
- responseText.value = "카메라를 사용할 없습니다. 시작할 수 없습니다.";
275
- alert("카메라를 사용할 없습니다. 먼저 권한을 허용해주세요.");
276
  return;
277
  }
278
  isProcessing = true;
279
- startButton.textContent = "중지";
280
  startButton.classList.replace("start", "stop");
281
-
282
  instructionText.disabled = true;
283
  intervalSelect.disabled = true;
284
-
285
- responseText.value = "처리 시작...";
286
-
287
  processingLoop();
288
  }
289
-
290
  function handleStop() {
291
  isProcessing = false;
292
- startButton.textContent = "시작";
293
  startButton.classList.replace("stop", "start");
294
-
295
  instructionText.disabled = false;
296
  intervalSelect.disabled = false;
297
- if (responseText.value.startsWith("처리 시작...")) {
298
- responseText.value = "처리 중지됨.";
299
  }
300
  }
301
-
302
  startButton.addEventListener("click", () => {
303
  if (isProcessing) {
304
  handleStop();
@@ -306,14 +282,13 @@
306
  handleStart();
307
  }
308
  });
309
-
310
  window.addEventListener("DOMContentLoaded", async () => {
311
- // WebGPU 지원 확인
312
  if (!navigator.gpu) {
313
  const videoElement = document.getElementById("videoFeed");
314
  const warningElement = document.createElement("p");
315
  warningElement.textContent =
316
- " 브라우저에서는 WebGPU를 사용할 없습니다.";
317
  warningElement.style.color = "red";
318
  warningElement.style.textAlign = "center";
319
  videoElement.parentNode.insertBefore(
@@ -321,11 +296,9 @@
321
  videoElement.nextSibling
322
  );
323
  }
324
-
325
  await initModel();
326
  await initCamera();
327
  });
328
-
329
  window.addEventListener("beforeunload", () => {
330
  if (stream) {
331
  stream.getTracks().forEach((track) => track.stop());
@@ -333,4 +306,4 @@
333
  });
334
  </script>
335
  </body>
336
- </html>
 
1
  <!DOCTYPE html>
2
+ <html lang="en">
3
  <head>
4
  <meta charset="UTF-8" />
5
  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
6
+ <title>Camera Interaction App</title>
7
  <style>
8
  body {
9
  font-family: sans-serif;
 
77
  color: white;
78
  }
79
  #startButton.start {
80
+ background-color: #28a745; /* Green */
81
  }
82
  #startButton.stop {
83
+ background-color: #dc3545; /* Red */
84
  }
85
  label {
86
  font-weight: bold;
 
96
  </style>
97
  </head>
98
  <body>
99
+ <h1>Camera Interaction App</h1>
100
 
101
  <div id="videoContainer">
102
  <video id="videoFeed" autoplay playsinline></video>
103
+ <div id="loadingOverlay">Loading...</div>
104
  </div>
105
  <canvas id="canvas" class="hidden"></canvas>
106
+ <!-- For capturing frames -->
107
 
108
  <div class="io-areas">
109
  <div>
110
+ <label for="instructionText">Instruction:</label><br />
111
  <textarea
112
  id="instructionText"
113
  style="height: 2em; width: 40em"
 
115
  ></textarea>
116
  </div>
117
  <div>
118
+ <label for="responseText">Response:</label><br />
119
  <textarea
120
  id="responseText"
121
  style="height: 2em; width: 40em"
122
  name="Response"
123
  readonly
124
+ placeholder="Server response will appear here..."
125
  ></textarea>
126
  </div>
127
  </div>
128
 
129
  <div class="controls">
130
+ <label for="intervalSelect">Interval between 2 requests:</label>
131
  <select id="intervalSelect" name="Interval between 2 requests">
132
  <option value="0" selected>0ms</option>
133
  <option value="100">100ms</option>
134
  <option value="250">250ms</option>
135
  <option value="500">500ms</option>
136
+ <option value="1000">1s</option>
137
+ <option value="2000">2s</option>
138
  </select>
139
+ <button id="startButton" class="start">Start</button>
140
  </div>
141
 
142
  <script type="module">
 
145
  AutoModelForVision2Seq,
146
  RawImage,
147
  } from "https://cdn.jsdelivr.net/npm/@huggingface/transformers/dist/transformers.min.js";
 
148
  const video = document.getElementById("videoFeed");
149
  const canvas = document.getElementById("canvas");
150
  const instructionText = document.getElementById("instructionText");
 
152
  const intervalSelect = document.getElementById("intervalSelect");
153
  const startButton = document.getElementById("startButton");
154
  const loadingOverlay = document.getElementById("loadingOverlay");
155
+ instructionText.value = "What do you see?"; // default instruction
 
 
156
  let stream;
157
  let isProcessing = false;
 
158
  let processor, model;
 
159
  async function initModel() {
160
+ const modelId = "HuggingFaceTB/SmolVLM-500M-Instruct"; // or "HuggingFaceTB/SmolVLM-Instruct";
161
  loadingOverlay.style.display = "flex";
162
+ responseText.value = "Loading processor...";
163
  processor = await AutoProcessor.from_pretrained(modelId);
164
+ responseText.value = "Processor loaded. Loading model...";
165
  model = await AutoModelForVision2Seq.from_pretrained(modelId, {
166
  dtype: {
167
  embed_tokens: "fp16",
 
170
  },
171
  device: "webgpu",
172
  });
173
+ responseText.value = "Model loaded. Initializing camera...";
174
  loadingOverlay.style.display = "none";
175
  }
 
176
  async function initCamera() {
177
  try {
178
  stream = await navigator.mediaDevices.getUserMedia({
 
180
  audio: false,
181
  });
182
  video.srcObject = stream;
183
+ responseText.value = "Camera access granted. Ready to start.";
184
  } catch (err) {
185
+ console.error("Error accessing camera:", err);
186
+ responseText.value = `Error accessing camera: ${err.name} - ${err.message}. Please ensure permissions are granted and you are on HTTPS or localhost.`;
187
  alert(
188
+ `Error accessing camera: ${err.name}. Make sure you've granted permission and are on HTTPS or localhost.`
189
  );
190
  }
191
  }
 
192
  function captureImage() {
193
  if (!stream || !video.videoWidth) {
194
+ console.warn("Video stream not ready for capture.");
195
  return null;
196
  }
197
  canvas.width = video.videoWidth;
 
201
  const frame = context.getImageData(0, 0, canvas.width, canvas.height);
202
  return new RawImage(frame.data, frame.width, frame.height, 4);
203
  }
 
204
  async function runLocalVisionInference(imgElement, instruction) {
 
 
 
 
 
 
205
  const messages = [
206
  {
207
  role: "user",
208
+ content: [{ type: "image" }, { type: "text", text: instruction }],
209
  },
210
  ];
211
  const text = processor.apply_chat_template(messages, {
 
224
  );
225
  return output[0].trim();
226
  }
 
227
  async function sendData() {
228
  if (!isProcessing) return;
229
  const instruction = instructionText.value;
230
  const rawImg = captureImage();
231
  if (!rawImg) {
232
+ responseText.value = "Capture failed";
233
  return;
234
  }
235
  try {
 
237
  responseText.value = reply;
238
  } catch (e) {
239
  console.error(e);
240
+ responseText.value = `Error: ${e.message}`;
241
  }
242
  }
 
243
  function sleep(ms) {
244
  return new Promise((resolve) => setTimeout(resolve, ms));
245
  }
 
246
  async function processingLoop() {
247
  const intervalMs = parseInt(intervalSelect.value, 10);
248
  while (isProcessing) {
 
251
  await sleep(intervalMs);
252
  }
253
  }
 
254
  function handleStart() {
255
  if (!stream) {
256
+ responseText.value = "Camera not available. Cannot start.";
257
+ alert("Camera not available. Please grant permission first.");
258
  return;
259
  }
260
  isProcessing = true;
261
+ startButton.textContent = "Stop";
262
  startButton.classList.replace("start", "stop");
 
263
  instructionText.disabled = true;
264
  intervalSelect.disabled = true;
265
+ responseText.value = "Processing started...";
 
 
266
  processingLoop();
267
  }
 
268
  function handleStop() {
269
  isProcessing = false;
270
+ startButton.textContent = "Start";
271
  startButton.classList.replace("stop", "start");
 
272
  instructionText.disabled = false;
273
  intervalSelect.disabled = false;
274
+ if (responseText.value.startsWith("Processing started...")) {
275
+ responseText.value = "Processing stopped.";
276
  }
277
  }
 
278
  startButton.addEventListener("click", () => {
279
  if (isProcessing) {
280
  handleStop();
 
282
  handleStart();
283
  }
284
  });
 
285
  window.addEventListener("DOMContentLoaded", async () => {
286
+ // Check for WebGPU support
287
  if (!navigator.gpu) {
288
  const videoElement = document.getElementById("videoFeed");
289
  const warningElement = document.createElement("p");
290
  warningElement.textContent =
291
+ "WebGPU is not available in this browser.";
292
  warningElement.style.color = "red";
293
  warningElement.style.textAlign = "center";
294
  videoElement.parentNode.insertBefore(
 
296
  videoElement.nextSibling
297
  );
298
  }
 
299
  await initModel();
300
  await initCamera();
301
  });
 
302
  window.addEventListener("beforeunload", () => {
303
  if (stream) {
304
  stream.getTracks().forEach((track) => track.stop());
 
306
  });
307
  </script>
308
  </body>
309
+ </html>