ginipick commited on
Commit
1fd79c1
·
verified ·
1 Parent(s): 909a8d4

Update index.html

Browse files
Files changed (1) hide show
  1. index.html +43 -37
index.html CHANGED
@@ -1,9 +1,9 @@
1
  <!DOCTYPE html>
2
- <html lang="en">
3
  <head>
4
  <meta charset="UTF-8" />
5
  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
6
- <title>Camera Interaction App</title>
7
  <style>
8
  body {
9
  font-family: sans-serif;
@@ -77,10 +77,10 @@
77
  color: white;
78
  }
79
  #startButton.start {
80
- background-color: #28a745; /* Green */
81
  }
82
  #startButton.stop {
83
- background-color: #dc3545; /* Red */
84
  }
85
  label {
86
  font-weight: bold;
@@ -96,18 +96,18 @@
96
  </style>
97
  </head>
98
  <body>
99
- <h1>Camera Interaction App</h1>
100
 
101
  <div id="videoContainer">
102
  <video id="videoFeed" autoplay playsinline></video>
103
- <div id="loadingOverlay">Loading...</div>
104
  </div>
105
  <canvas id="canvas" class="hidden"></canvas>
106
- <!-- For capturing frames -->
107
 
108
  <div class="io-areas">
109
  <div>
110
- <label for="instructionText">Instruction:</label><br />
111
  <textarea
112
  id="instructionText"
113
  style="height: 2em; width: 40em"
@@ -115,28 +115,28 @@
115
  ></textarea>
116
  </div>
117
  <div>
118
- <label for="responseText">Response:</label><br />
119
  <textarea
120
  id="responseText"
121
  style="height: 2em; width: 40em"
122
  name="Response"
123
  readonly
124
- placeholder="Server response will appear here..."
125
  ></textarea>
126
  </div>
127
  </div>
128
 
129
  <div class="controls">
130
- <label for="intervalSelect">Interval between 2 requests:</label>
131
  <select id="intervalSelect" name="Interval between 2 requests">
132
  <option value="0" selected>0ms</option>
133
  <option value="100">100ms</option>
134
  <option value="250">250ms</option>
135
  <option value="500">500ms</option>
136
- <option value="1000">1s</option>
137
- <option value="2000">2s</option>
138
  </select>
139
- <button id="startButton" class="start">Start</button>
140
  </div>
141
 
142
  <script type="module">
@@ -154,7 +154,7 @@
154
  const startButton = document.getElementById("startButton");
155
  const loadingOverlay = document.getElementById("loadingOverlay");
156
 
157
- instructionText.value = "What do you see?"; // default instruction
158
 
159
  let stream;
160
  let isProcessing = false;
@@ -162,11 +162,11 @@
162
  let processor, model;
163
 
164
  async function initModel() {
165
- const modelId = "HuggingFaceTB/SmolVLM-500M-Instruct"; // or "HuggingFaceTB/SmolVLM-Instruct";
166
  loadingOverlay.style.display = "flex";
167
- responseText.value = "Loading processor...";
168
  processor = await AutoProcessor.from_pretrained(modelId);
169
- responseText.value = "Processor loaded. Loading model...";
170
  model = await AutoModelForVision2Seq.from_pretrained(modelId, {
171
  dtype: {
172
  embed_tokens: "fp16",
@@ -175,7 +175,7 @@
175
  },
176
  device: "webgpu",
177
  });
178
- responseText.value = "Model loaded. Initializing camera...";
179
  loadingOverlay.style.display = "none";
180
  }
181
 
@@ -186,19 +186,19 @@
186
  audio: false,
187
  });
188
  video.srcObject = stream;
189
- responseText.value = "Camera access granted. Ready to start.";
190
  } catch (err) {
191
- console.error("Error accessing camera:", err);
192
- responseText.value = `Error accessing camera: ${err.name} - ${err.message}. Please ensure permissions are granted and you are on HTTPS or localhost.`;
193
  alert(
194
- `Error accessing camera: ${err.name}. Make sure you've granted permission and are on HTTPS or localhost.`
195
  );
196
  }
197
  }
198
 
199
  function captureImage() {
200
  if (!stream || !video.videoWidth) {
201
- console.warn("Video stream not ready for capture.");
202
  return null;
203
  }
204
  canvas.width = video.videoWidth;
@@ -210,10 +210,16 @@
210
  }
211
 
212
  async function runLocalVisionInference(imgElement, instruction) {
 
 
 
 
 
 
213
  const messages = [
214
  {
215
  role: "user",
216
- content: [{ type: "image" }, { type: "text", text: instruction }],
217
  },
218
  ];
219
  const text = processor.apply_chat_template(messages, {
@@ -238,7 +244,7 @@
238
  const instruction = instructionText.value;
239
  const rawImg = captureImage();
240
  if (!rawImg) {
241
- responseText.value = "Capture failed";
242
  return;
243
  }
244
  try {
@@ -246,7 +252,7 @@
246
  responseText.value = reply;
247
  } catch (e) {
248
  console.error(e);
249
- responseText.value = `Error: ${e.message}`;
250
  }
251
  }
252
 
@@ -265,31 +271,31 @@
265
 
266
  function handleStart() {
267
  if (!stream) {
268
- responseText.value = "Camera not available. Cannot start.";
269
- alert("Camera not available. Please grant permission first.");
270
  return;
271
  }
272
  isProcessing = true;
273
- startButton.textContent = "Stop";
274
  startButton.classList.replace("start", "stop");
275
 
276
  instructionText.disabled = true;
277
  intervalSelect.disabled = true;
278
 
279
- responseText.value = "Processing started...";
280
 
281
  processingLoop();
282
  }
283
 
284
  function handleStop() {
285
  isProcessing = false;
286
- startButton.textContent = "Start";
287
  startButton.classList.replace("stop", "start");
288
 
289
  instructionText.disabled = false;
290
  intervalSelect.disabled = false;
291
- if (responseText.value.startsWith("Processing started...")) {
292
- responseText.value = "Processing stopped.";
293
  }
294
  }
295
 
@@ -302,12 +308,12 @@
302
  });
303
 
304
  window.addEventListener("DOMContentLoaded", async () => {
305
- // Check for WebGPU support
306
  if (!navigator.gpu) {
307
  const videoElement = document.getElementById("videoFeed");
308
  const warningElement = document.createElement("p");
309
  warningElement.textContent =
310
- "WebGPU is not available in this browser.";
311
  warningElement.style.color = "red";
312
  warningElement.style.textAlign = "center";
313
  videoElement.parentNode.insertBefore(
@@ -327,4 +333,4 @@
327
  });
328
  </script>
329
  </body>
330
- </html>
 
1
  <!DOCTYPE html>
2
+ <html lang="ko">
3
  <head>
4
  <meta charset="UTF-8" />
5
  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
6
+ <title>카메라 상호작용 앱</title>
7
  <style>
8
  body {
9
  font-family: sans-serif;
 
77
  color: white;
78
  }
79
  #startButton.start {
80
+ background-color: #28a745; /* 녹색 */
81
  }
82
  #startButton.stop {
83
+ background-color: #dc3545; /* 빨간색 */
84
  }
85
  label {
86
  font-weight: bold;
 
96
  </style>
97
  </head>
98
  <body>
99
+ <h1>카메라 상호작용 앱</h1>
100
 
101
  <div id="videoContainer">
102
  <video id="videoFeed" autoplay playsinline></video>
103
+ <div id="loadingOverlay">로딩 중...</div>
104
  </div>
105
  <canvas id="canvas" class="hidden"></canvas>
106
+ <!-- 프레임 캡처용 -->
107
 
108
  <div class="io-areas">
109
  <div>
110
+ <label for="instructionText">지시사항:</label><br />
111
  <textarea
112
  id="instructionText"
113
  style="height: 2em; width: 40em"
 
115
  ></textarea>
116
  </div>
117
  <div>
118
+ <label for="responseText">응답:</label><br />
119
  <textarea
120
  id="responseText"
121
  style="height: 2em; width: 40em"
122
  name="Response"
123
  readonly
124
+ placeholder="서버 응답이 여기에 표시됩니다..."
125
  ></textarea>
126
  </div>
127
  </div>
128
 
129
  <div class="controls">
130
+ <label for="intervalSelect">요청 간격:</label>
131
  <select id="intervalSelect" name="Interval between 2 requests">
132
  <option value="0" selected>0ms</option>
133
  <option value="100">100ms</option>
134
  <option value="250">250ms</option>
135
  <option value="500">500ms</option>
136
+ <option value="1000">1초</option>
137
+ <option value="2000">2초</option>
138
  </select>
139
+ <button id="startButton" class="start">시작</button>
140
  </div>
141
 
142
  <script type="module">
 
154
  const startButton = document.getElementById("startButton");
155
  const loadingOverlay = document.getElementById("loadingOverlay");
156
 
157
+ instructionText.value = "무엇이 보이나요? 한국어로 대답해주세요."; // 기본 지시사항
158
 
159
  let stream;
160
  let isProcessing = false;
 
162
  let processor, model;
163
 
164
  async function initModel() {
165
+ const modelId = "HuggingFaceTB/SmolVLM-500M-Instruct"; // 또는 "HuggingFaceTB/SmolVLM-Instruct";
166
  loadingOverlay.style.display = "flex";
167
+ responseText.value = "프로세서 로딩 중...";
168
  processor = await AutoProcessor.from_pretrained(modelId);
169
+ responseText.value = "프로세서 로딩 완료. 모델 로딩 중...";
170
  model = await AutoModelForVision2Seq.from_pretrained(modelId, {
171
  dtype: {
172
  embed_tokens: "fp16",
 
175
  },
176
  device: "webgpu",
177
  });
178
+ responseText.value = "모델 로딩 완료. 카메라 초기화 중...";
179
  loadingOverlay.style.display = "none";
180
  }
181
 
 
186
  audio: false,
187
  });
188
  video.srcObject = stream;
189
+ responseText.value = "카메라 접근 권한이 허용되었습니다. 시작할 준비가 되었습니다.";
190
  } catch (err) {
191
+ console.error("카메라 접근 오류:", err);
192
+ responseText.value = `카메라 접근 오류: ${err.name} - ${err.message}. 권한이 허용되었는지, HTTPS 또는 localhost에서 실행 중인지 확인하세요.`;
193
  alert(
194
+ `카메라 접근 오류: ${err.name}. 권한을 허용했는지, HTTPS 또는 localhost에서 실행 중인지 확인하세요.`
195
  );
196
  }
197
  }
198
 
199
  function captureImage() {
200
  if (!stream || !video.videoWidth) {
201
+ console.warn("캡처할 비디오 스트림이 준비되지 않았습니다.");
202
  return null;
203
  }
204
  canvas.width = video.videoWidth;
 
210
  }
211
 
212
  async function runLocalVisionInference(imgElement, instruction) {
213
+ // 지시사항에 한국어로 대답해달라는 문구가 없으면 추가
214
+ let koreanInstruction = instruction;
215
+ if (!instruction.includes("한국어") && !instruction.includes("Korean")) {
216
+ koreanInstruction = instruction + " (한국어로 대답해주세요)";
217
+ }
218
+
219
  const messages = [
220
  {
221
  role: "user",
222
+ content: [{ type: "image" }, { type: "text", text: koreanInstruction }],
223
  },
224
  ];
225
  const text = processor.apply_chat_template(messages, {
 
244
  const instruction = instructionText.value;
245
  const rawImg = captureImage();
246
  if (!rawImg) {
247
+ responseText.value = "캡처 실패";
248
  return;
249
  }
250
  try {
 
252
  responseText.value = reply;
253
  } catch (e) {
254
  console.error(e);
255
+ responseText.value = `오류: ${e.message}`;
256
  }
257
  }
258
 
 
271
 
272
  function handleStart() {
273
  if (!stream) {
274
+ responseText.value = "카메라를 사용할 없습니다. 시작할 수 없습니다.";
275
+ alert("카메라를 사용할 없습니다. 먼저 권한을 허용해주세요.");
276
  return;
277
  }
278
  isProcessing = true;
279
+ startButton.textContent = "중지";
280
  startButton.classList.replace("start", "stop");
281
 
282
  instructionText.disabled = true;
283
  intervalSelect.disabled = true;
284
 
285
+ responseText.value = "처리 시작...";
286
 
287
  processingLoop();
288
  }
289
 
290
  function handleStop() {
291
  isProcessing = false;
292
+ startButton.textContent = "시작";
293
  startButton.classList.replace("stop", "start");
294
 
295
  instructionText.disabled = false;
296
  intervalSelect.disabled = false;
297
+ if (responseText.value.startsWith("처리 시작...")) {
298
+ responseText.value = "처리 중지됨.";
299
  }
300
  }
301
 
 
308
  });
309
 
310
  window.addEventListener("DOMContentLoaded", async () => {
311
+ // WebGPU 지원 확인
312
  if (!navigator.gpu) {
313
  const videoElement = document.getElementById("videoFeed");
314
  const warningElement = document.createElement("p");
315
  warningElement.textContent =
316
+ " 브라우저에서는 WebGPU를 사용할 없습니다.";
317
  warningElement.style.color = "red";
318
  warningElement.style.textAlign = "center";
319
  videoElement.parentNode.insertBefore(
 
333
  });
334
  </script>
335
  </body>
336
+ </html>