BK-V commited on
Commit
e75589a
·
1 Parent(s): 1049c26

change ui style to be more modern

Browse files
Files changed (1) hide show
  1. index.html +382 -409
index.html CHANGED
@@ -4,497 +4,470 @@
4
  <meta charset="UTF-8" />
5
  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
6
  <title>Camera Interaction App</title>
7
- <!-- Add Vazirmatn Persian font from Google Fonts -->
8
- <link href="https://fonts.googleapis.com/css2?family=Vazirmatn:wght@400;600;700&display=swap" rel="stylesheet">
 
 
 
 
 
 
9
  <style>
10
  :root {
11
- --primary: #2563eb;
12
- --primary-dark: #1e40af;
13
- --danger: #dc2626;
14
- --success: #16a34a;
15
- --bg: #f8fafc;
16
- --surface: #ffffff;
17
- --border: #e5e7eb;
18
- --shadow: 0 4px 24px 0 rgba(30, 41, 59, 0.08);
19
- --radius: 14px;
20
- --font: 'Vazirmatn', 'Inter', 'Segoe UI', Arial, sans-serif;
21
- }
22
- html, body {
23
- height: 100%;
24
- margin: 0;
25
- padding: 0;
26
  }
 
27
  body {
28
- font-family: var(--font);
29
- background: var(--bg);
30
- min-height: 100vh;
 
 
31
  display: flex;
32
  flex-direction: column;
33
  align-items: center;
34
- gap: 32px;
35
- padding: 40px 0;
36
- }
37
- h1 {
38
- color: var(--primary-dark);
39
- font-size: 2.2rem;
40
- font-weight: 700;
41
- margin-bottom: 0;
42
- text-align: center;
43
- width: 100%;
44
- max-width: 700px;
45
  }
 
46
  .container {
47
- width: 100%;
48
- max-width: 700px;
 
 
 
 
 
49
  display: flex;
50
- flex-direction: column;
51
- gap: 32px;
 
 
 
 
 
 
 
 
52
  }
53
- #videoContainer {
 
54
  position: relative;
55
  width: 100%;
56
- max-width: 520px;
57
- aspect-ratio: 4/3;
58
- border-radius: var(--radius);
59
  overflow: hidden;
60
- box-shadow: var(--shadow);
61
- border: 1.5px solid var(--border);
62
- background: #000;
63
- margin: 0 auto;
64
  }
 
65
  #videoFeed {
66
  width: 100%;
67
  height: 100%;
68
  object-fit: cover;
69
- border-radius: var(--radius);
70
  display: block;
71
  }
 
72
  #loadingOverlay {
73
  position: absolute;
74
- inset: 0;
 
 
 
 
 
75
  display: none;
76
  justify-content: center;
77
  align-items: center;
78
- background: rgba(30, 41, 59, 0.7);
79
- z-index: 10;
80
- color: #fff;
81
- font-size: 1.5em;
82
- font-weight: bold;
83
- border-radius: var(--radius);
84
- text-align: right;
85
  }
86
- .io-areas {
87
- display: flex;
88
- flex-direction: column;
89
- gap: 18px;
90
- background: var(--surface);
91
- border-radius: var(--radius);
92
- box-shadow: var(--shadow);
93
- padding: 24px 28px;
94
- align-items: stretch;
95
- border: 1.5px solid var(--border);
96
- }
97
- .io-areas > div {
98
- display: flex;
99
- flex-direction: column;
100
- align-items: flex-start;
101
- gap: 6px;
102
  }
103
- label {
104
- font-weight: 600;
105
- color: var(--primary-dark);
106
- font-size: 1rem;
107
- text-align: right;
108
- width: 100%;
109
  }
110
- textarea {
 
111
  width: 100%;
112
- min-width: 320px;
113
- max-width: 600px;
114
- height: 8em;
115
- min-height: 8em;
116
- max-height: 8em;
117
- padding: 10px 14px;
118
- border: 1.5px solid var(--border);
119
- border-radius: 8px;
120
- font-size: 1rem;
121
- background: var(--bg);
122
- color: #222;
123
- resize: vertical;
124
  box-sizing: border-box;
 
 
 
125
  text-align: right;
126
- transition: border 0.2s;
127
- }
128
- textarea:focus {
129
- border-color: var(--primary);
130
- outline: none;
131
- background: #f1f5f9;
132
  }
 
133
  .controls {
134
  display: flex;
135
- gap: 18px;
 
 
 
 
 
 
136
  align-items: center;
137
- background: var(--surface);
138
- padding: 18px 28px;
139
- border-radius: var(--radius);
140
- box-shadow: var(--shadow);
141
- border: 1.5px solid var(--border);
142
- justify-content: flex-start;
143
- }
144
- .controls label {
145
- margin-bottom: 0;
146
- font-size: 1rem;
147
- color: #334155;
148
- font-weight: 500;
149
- text-align: right;
150
- width: auto;
151
  }
152
- select {
153
- padding: 8px 14px;
154
- border-radius: 8px;
155
- border: 1.5px solid var(--border);
 
 
156
  font-size: 1rem;
157
- background: var(--bg);
158
- color: #222;
159
  text-align: right;
160
- min-width: 90px;
161
- transition: border 0.2s;
162
  }
163
- select:focus {
164
- border-color: var(--primary);
165
- outline: none;
 
 
 
166
  }
167
- #startButton {
168
- padding: 10px 28px;
169
- font-size: 1.1rem;
170
- font-weight: 600;
171
- cursor: pointer;
172
  border: none;
173
- border-radius: 8px;
174
- color: #fff;
175
- transition: background 0.2s, box-shadow 0.2s;
176
- box-shadow: 0 2px 8px 0 rgba(37, 99, 235, 0.08);
 
 
 
 
177
  }
178
- #startButton.start {
179
- background: var(--success);
 
 
180
  }
181
- #startButton.stop {
182
- background: var(--danger);
 
183
  }
 
184
  .hidden {
185
  display: none;
186
  }
187
- /* Right-align all text elements */
188
- *:not(input):not(textarea) {
189
- direction: rtl;
190
- }
191
- textarea, select, input {
192
- direction: rtl;
193
- }
194
- /* Responsive */
195
- @media (max-width: 800px) {
196
- .container {
197
- max-width: 98vw;
198
- }
199
- #videoContainer {
200
- max-width: 98vw;
201
- }
202
- textarea {
203
- min-width: 0;
204
- max-width: 98vw;
205
- }
206
  }
207
- @media (max-width: 600px) {
208
- .io-areas, .controls {
209
- padding: 12px 6px;
210
- }
211
- h1 {
212
- font-size: 1.3rem;
213
- }
214
  }
215
  </style>
216
  </head>
217
  <body>
218
  <div class="container">
219
- <h1>مدل زبانی-بصری فارسی</h1>
220
- <div id="videoContainer">
 
 
 
221
  <video id="videoFeed" autoplay playsinline></video>
222
  <div id="loadingOverlay">در حال بارگذاری...</div>
223
  </div>
224
  <canvas id="canvas" class="hidden"></canvas>
225
- <div class="io-areas">
226
- <!-- <div>
227
- <label for="instructionText">دستورالعمل:</label>
228
- <textarea
229
- id="instructionText"
230
- name="Instruction"
231
- placeholder="دستورالعمل خود را وارد کنید..."
232
- ></textarea>
233
- </div> -->
234
- <div>
235
- <label for="responseText">پاسخ:</label>
236
- <textarea
237
- id="responseText"
238
- name="Response"
239
- readonly
240
- placeholder="پاسخ سرور اینجا نمایش داده می‌شود..."
241
- rows="4"
242
- cols="50"
243
- ></textarea>
244
- </div>
245
  </div>
 
246
  <div class="controls">
247
- <label for="intervalSelect">فاصله بین دو درخواست:</label>
248
- <select id="intervalSelect" name="Interval between 2 requests">
249
- <option value="0" selected>۰ میلی‌ثانیه</option>
250
- <option value="100">۱۰۰ میلی‌ثانیه</option>
251
- <option value="250">۲۵۰ میلی‌ثانیه</option>
252
- <option value="500">۵۰۰ میلی‌ثانیه</option>
253
- <option value="1000">۱ ثانیه</option>
254
- <option value="2000">۲ ثانیه</option>
255
- </select>
256
- <button id="startButton" class="start">شروع</button>
 
 
257
  </div>
258
  </div>
 
259
  <script type="module">
260
- import {
261
- AutoProcessor,
262
- AutoModelForVision2Seq,
263
- RawImage,
264
- } from "https://cdn.jsdelivr.net/npm/@huggingface/transformers/dist/transformers.min.js";
265
 
266
- import OpenAI from "https://cdn.jsdelivr.net/npm/[email protected]/+esm"
267
 
268
- const baseURL = "https://api.avalai.ir/v1";
269
 
270
- const openai = new OpenAI({
271
- apiKey: "aa-H6NlUS0RP0RWYcNgh0eAIhsl0tBxJ1vgw4xG9M3HdFhXIS3h",
272
- baseURL: baseURL,
273
- dangerouslyAllowBrowser: true
274
- });
275
 
276
- const video = document.getElementById("videoFeed");
277
- const canvas = document.getElementById("canvas");
278
- // const instructionText = document.getElementById("instructionText");
279
- const responseText = document.getElementById("responseText");
280
- const intervalSelect = document.getElementById("intervalSelect");
281
- const startButton = document.getElementById("startButton");
282
- const loadingOverlay = document.getElementById("loadingOverlay");
283
 
284
- // instructionText.value = "What do you see?"; // default instruction (Persian)
285
- const CONTEXT = `
286
  Translate the text into persian and only return the translated text without any other text.
287
- `
288
-
289
- let stream;
290
- let isProcessing = false;
291
- let processor, model;
292
- async function initModel() {
293
- const modelId = "HuggingFaceTB/SmolVLM-500M-Instruct"; // or "HuggingFaceTB/SmolVLM-Instruct";
294
- loadingOverlay.style.display = "flex";
295
- responseText.value = "Loading processor...";
296
- processor = await AutoProcessor.from_pretrained(modelId);
297
- responseText.value = "Processor loaded. Loading model...";
298
- model = await AutoModelForVision2Seq.from_pretrained(modelId, {
299
- dtype: {
300
- embed_tokens: "fp16",
301
- vision_encoder: "q4",
302
- decoder_model_merged: "q4",
303
- },
304
- device: "webgpu",
305
- });
306
- responseText.value = "Model loaded. Initializing camera...";
307
- loadingOverlay.style.display = "none";
308
- }
309
- async function initCamera() {
310
- try {
311
- stream = await navigator.mediaDevices.getUserMedia({
312
- video: true,
313
- audio: false,
314
- });
315
- video.srcObject = stream;
316
- responseText.value = "Camera access granted. Ready to start.";
317
- } catch (err) {
318
- console.error("Error accessing camera:", err);
319
- responseText.value = `Error accessing camera: ${err.name} - ${err.message}. Please ensure permissions are granted and you are on HTTPS or localhost.`;
320
- alert(
321
- `Error accessing camera: ${err.name}. Make sure you've granted permission and are on HTTPS or localhost.`
322
- );
323
- }
324
  }
325
- function captureImage() {
326
- if (!stream || !video.videoWidth) {
327
- console.warn("Video stream not ready for capture.");
328
- return null;
329
- }
330
- canvas.width = video.videoWidth;
331
- canvas.height = video.videoHeight;
332
- const context = canvas.getContext("2d", { willReadFrequently: true });
333
- context.drawImage(video, 0, 0, canvas.width, canvas.height);
334
- const frame = context.getImageData(0, 0, canvas.width, canvas.height);
335
- return new RawImage(frame.data, frame.width, frame.height, 4);
336
  }
337
- async function runLocalVisionInference(imgElement, instruction) {
338
- const messages = [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
339
  {
340
- role: "user",
341
- content: [{ type: "image" }, { type: "text", text: instruction }],
342
  },
343
- ];
344
-
345
- const text = processor.apply_chat_template(messages, {
346
- add_generation_prompt: true,
347
- });
348
-
349
- const inputs = await processor(text, [imgElement], {
350
- do_image_splitting: false,
351
- });
352
-
353
- const generatedIds = await model.generate({
354
- ...inputs,
355
- max_new_tokens: 100,
356
- });
357
-
358
- const output = processor.batch_decode(
359
- generatedIds.slice(null, [inputs.input_ids.dims.at(-1), null]),
360
- { skip_special_tokens: true }
361
- );
362
- return output[0].trim();
363
- }
364
-
365
- async function callExternalLLmAPI(text) {
366
- let response = await fetch("https://openrouter.ai/api/v1/chat/completions", {
367
- method: "POST",
368
- headers: {
369
- "Authorization": "Bearer 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",
370
- "HTTP-Referer": "<YOUR_SITE_URL>", // Optional. Site URL for rankings on openrouter.ai.
371
- "X-Title": "<YOUR_SITE_NAME>", // Optional. Site title for rankings on openrouter.ai.
372
- "Content-Type": "application/json"
373
  },
374
- body: JSON.stringify({
375
- "model": "qwen/qwen-2.5-72b-instruct:free",
376
- "messages": [
377
- {
378
- "role": "system",
379
- "content": CONTEXT
380
- },
381
- {
382
- "role": "user",
383
- "content": text
384
- }
385
- ]
386
- })
387
- });
388
-
389
- if (!response.ok) {
390
- throw new Error(`HTTP error! Status: ${response.status}`);
391
- }
392
-
393
- const data = await response.json();
394
- const generatedText = data.choices[0].message.content;
395
- return generatedText;
396
- }
397
 
398
- async function callExternalLLmAPI2(text) {
399
-
400
- const response = await openai.chat.completions.create({
401
- messages: [
402
- { role: "system", content: CONTEXT },
403
- { role: "user", content: text }
404
- ],
405
- model: "gpt-4o",
406
- });
407
-
408
- let generatedText = response.choices[0].message.content;
409
- generatedText = generatedText.trim();
410
- return generatedText;
411
  }
412
 
413
- async function sendData() {
414
- if (!isProcessing) return;
415
- const instruction = "What do you see?";
416
- const rawImg = captureImage();
417
- if (!rawImg) {
418
- responseText.value = "Capture failed";
419
- return;
420
- }
421
- try {
422
- const reply = await runLocalVisionInference(rawImg, instruction);
423
- const translatedReply = await callExternalLLmAPI2(reply);
424
- responseText.value = translatedReply;
425
- } catch (e) {
426
- console.error(e);
427
- responseText.value = `Error: ${e.message}`;
428
- }
 
 
 
 
 
 
 
 
 
 
429
  }
430
- function sleep(ms) {
431
- return new Promise((resolve) => setTimeout(resolve, ms));
 
 
 
 
 
432
  }
433
- async function processingLoop() {
434
- const intervalMs = parseInt(intervalSelect.value, 10);
435
- while (isProcessing) {
436
- await sendData();
437
- if (!isProcessing) break;
438
- await sleep(intervalMs);
439
- }
 
 
 
440
  }
441
- function handleStart() {
442
- if (!stream) {
443
- responseText.value = "Camera not available. Cannot start.";
444
- alert("Camera not available. Please grant permission first.");
445
- return;
446
- }
447
- isProcessing = true;
448
- startButton.textContent = "توقف";
449
- startButton.classList.replace("start", "stop");
450
- // instructionText.disabled = true;
451
- intervalSelect.disabled = true;
452
- responseText.value = "Processing started...";
453
- processingLoop();
454
  }
455
- function handleStop() {
456
- isProcessing = false;
457
- startButton.textContent = "شروع";
458
- startButton.classList.replace("stop", "start");
459
- // instructionText.disabled = false;
460
- intervalSelect.disabled = false;
461
- if (responseText.value.startsWith("Processing started...")) {
462
- responseText.value = "Processing stopped.";
463
- }
 
 
 
 
 
 
 
464
  }
465
- startButton.addEventListener("click", () => {
466
- if (isProcessing) {
467
- handleStop();
468
- } else {
469
- handleStart();
470
- }
471
- });
472
- window.addEventListener("DOMContentLoaded", async () => {
473
- // Check for WebGPU support
474
- if (!navigator.gpu) {
475
- const videoElement = document.getElementById("videoFeed");
476
- const warningElement = document.createElement("p");
477
- warningElement.textContent =
478
- "WebGPU is not available in this browser.";
479
- warningElement.style.color = "red";
480
- warningElement.style.textAlign = "center";
481
- videoElement.parentNode.insertBefore(
482
- warningElement,
483
- videoElement.nextSibling
484
- );
485
- }
486
- await initModel();
487
- await initCamera();
488
- // Set placeholders and button text to Persian on load
489
- // instructionText.placeholder = "دستورالعمل خود را وارد کنید...";
490
- responseText.placeholder = "پاسخ سرور اینجا نمایش داده می‌شود...";
491
- startButton.textContent = isProcessing ? "توقف" : "شروع";
492
- });
493
- window.addEventListener("beforeunload", () => {
494
- if (stream) {
495
- stream.getTracks().forEach((track) => track.stop());
496
- }
497
- });
498
  </script>
499
  </body>
500
  </html>
 
4
  <meta charset="UTF-8" />
5
  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
6
  <title>Camera Interaction App</title>
7
+ <link
8
+ href="https://fonts.googleapis.com/css2?family=Vazirmatn:wght@400;600;700&display=swap"
9
+ rel="stylesheet"
10
+ />
11
+ <link
12
+ rel="stylesheet"
13
+ href="https://fonts.googleapis.com/icon?family=Material+Icons"
14
+ />
15
  <style>
16
  :root {
17
+ --primary-color: #6200ee;
18
+ --primary-variant: #3700b3;
19
+ --secondary-color: #03dac6;
20
+ --background-color: #fff;
21
+ --surface-color: #fff;
22
+ --error-color: #b00020;
23
+ --text-primary: #212121;
24
+ --text-secondary: #757575;
25
+ --disabled-color: #bdbdbd;
26
+ --elevation-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
27
+ --border-radius: 4px;
28
+ --font-family: 'Vazirmatn', 'Inter', 'Segoe UI', Arial, sans-serif;
 
 
 
29
  }
30
+
31
  body {
32
+ font-family: var(--font-family);
33
+ background-color: var(--background-color);
34
+ color: var(--text-primary);
35
+ margin: 0;
36
+ padding: 0;
37
  display: flex;
38
  flex-direction: column;
39
  align-items: center;
40
+ justify-content: center;
41
+ min-height: 100vh;
42
+ direction: rtl;
 
 
 
 
 
 
 
 
43
  }
44
+
45
  .container {
46
+ width: 90%;
47
+ max-width: 800px;
48
+ padding: 24px;
49
+ box-sizing: border-box;
50
+ }
51
+
52
+ .header {
53
  display: flex;
54
+ align-items: center;
55
+ justify-content: space-between;
56
+ margin-bottom: 24px;
57
+ }
58
+
59
+ .header h1 {
60
+ font-size: 1.5rem;
61
+ font-weight: 500;
62
+ color: var(--primary-color);
63
+ margin: 0;
64
  }
65
+
66
+ .video-wrapper {
67
  position: relative;
68
  width: 100%;
69
+ aspect-ratio: 4 / 3;
70
+ border-radius: var(--border-radius);
 
71
  overflow: hidden;
72
+ box-shadow: var(--elevation-shadow);
73
+ margin-bottom: 24px;
 
 
74
  }
75
+
76
  #videoFeed {
77
  width: 100%;
78
  height: 100%;
79
  object-fit: cover;
 
80
  display: block;
81
  }
82
+
83
  #loadingOverlay {
84
  position: absolute;
85
+ top: 0;
86
+ left: 0;
87
+ width: 100%;
88
+ height: 100%;
89
+ background-color: rgba(0, 0, 0, 0.5);
90
+ color: white;
91
  display: none;
92
  justify-content: center;
93
  align-items: center;
94
+ font-size: 1.2rem;
 
 
 
 
 
 
95
  }
96
+
97
+ .input-group {
98
+ margin-bottom: 16px;
 
 
 
 
 
 
 
 
 
 
 
 
 
99
  }
100
+
101
+ .input-group label {
102
+ display: block;
103
+ margin-bottom: 8px;
104
+ color: var(--text-secondary);
 
105
  }
106
+
107
+ .input-group textarea {
108
  width: 100%;
109
+ padding: 12px;
110
+ border: 1px solid var(--disabled-color);
111
+ border-radius: var(--border-radius);
 
 
 
 
 
 
 
 
 
112
  box-sizing: border-box;
113
+ font-family: inherit;
114
+ font-size: 1rem;
115
+ resize: none;
116
  text-align: right;
 
 
 
 
 
 
117
  }
118
+
119
  .controls {
120
  display: flex;
121
+ flex-direction: column;
122
+ gap: 16px;
123
+ }
124
+
125
+ .select-wrapper {
126
+ position: relative;
127
+ display: flex;
128
  align-items: center;
129
+ border: 1px solid var(--disabled-color);
130
+ border-radius: var(--border-radius);
131
+ overflow: hidden;
132
+ background-color: var(--surface-color);
 
 
 
 
 
 
 
 
 
 
133
  }
134
+
135
+ .select-wrapper select {
136
+ padding: 12px 40px 12px 12px;
137
+ border: none;
138
+ background-color: transparent;
139
+ font-family: inherit;
140
  font-size: 1rem;
141
+ color: var(--text-primary);
142
+ appearance: none;
143
  text-align: right;
144
+ flex: 1;
 
145
  }
146
+
147
+ .select-wrapper .material-icons {
148
+ position: absolute;
149
+ left: 12px;
150
+ color: var(--text-secondary);
151
+ pointer-events: none;
152
  }
153
+
154
+ .button {
155
+ padding: 12px 24px;
 
 
156
  border: none;
157
+ border-radius: var(--border-radius);
158
+ font-family: inherit;
159
+ font-size: 1rem;
160
+ font-weight: 500;
161
+ text-transform: uppercase;
162
+ cursor: pointer;
163
+ box-shadow: var(--elevation-shadow);
164
+ transition: background-color 0.3s;
165
  }
166
+
167
+ .button.primary {
168
+ background-color: var(--primary-color);
169
+ color: white;
170
  }
171
+
172
+ .button.primary:hover {
173
+ background-color: var(--primary-variant);
174
  }
175
+
176
  .hidden {
177
  display: none;
178
  }
179
+
180
+ /* Utility Classes */
181
+ .mt-2 {
182
+ margin-top: 16px;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
183
  }
184
+
185
+ .mb-2 {
186
+ margin-bottom: 16px;
 
 
 
 
187
  }
188
  </style>
189
  </head>
190
  <body>
191
  <div class="container">
192
+ <header class="header">
193
+ <h1>مدل زبانی-بصری فارسی</h1>
194
+ </header>
195
+
196
+ <div class="video-wrapper">
197
  <video id="videoFeed" autoplay playsinline></video>
198
  <div id="loadingOverlay">در حال بارگذاری...</div>
199
  </div>
200
  <canvas id="canvas" class="hidden"></canvas>
201
+
202
+ <div class="input-group">
203
+ <label for="responseText">پاسخ:</label>
204
+ <textarea
205
+ id="responseText"
206
+ rows="4"
207
+ readonly
208
+ placeholder="پاسخ سرور اینجا نمایش داده می‌شود..."
209
+ ></textarea>
 
 
 
 
 
 
 
 
 
 
 
210
  </div>
211
+
212
  <div class="controls">
213
+ <div class="select-wrapper mb-2">
214
+ <select id="intervalSelect">
215
+ <option value="0">۰ میلی‌ثانیه</option>
216
+ <option value="100">۱۰۰ میلی‌ثانیه</option>
217
+ <option value="250">۲۵۰ میلی‌ثانیه</option>
218
+ <option value="500">۵۰۰ میلی‌ثانیه</option>
219
+ <option value="1000">۱ ثانیه</option>
220
+ <option value="2000">۲ ثانیه</option>
221
+ </select>
222
+ <i class="material-icons">arrow_drop_down</i>
223
+ </div>
224
+ <button id="startButton" class="button primary">شروع</button>
225
  </div>
226
  </div>
227
+
228
  <script type="module">
229
+ import {
230
+ AutoProcessor,
231
+ AutoModelForVision2Seq,
232
+ RawImage,
233
+ } from 'https://cdn.jsdelivr.net/npm/@huggingface/transformers/dist/transformers.min.js';
234
 
235
+ import OpenAI from 'https://cdn.jsdelivr.net/npm/[email protected]/+esm';
236
 
237
+ const baseURL = 'https://api.avalai.ir/v1';
238
 
239
+ const openai = new OpenAI({
240
+ apiKey: 'aa-H6NlUS0RP0RWYcNgh0eAIhsl0tBxJ1vgw4xG9M3HdFhXIS3h',
241
+ baseURL: baseURL,
242
+ dangerouslyAllowBrowser: true,
243
+ });
244
 
245
+ const video = document.getElementById('videoFeed');
246
+ const canvas = document.getElementById('canvas');
247
+ const responseText = document.getElementById('responseText');
248
+ const intervalSelect = document.getElementById('intervalSelect');
249
+ const startButton = document.getElementById('startButton');
250
+ const loadingOverlay = document.getElementById('loadingOverlay');
 
251
 
252
+ const CONTEXT = `
 
253
  Translate the text into persian and only return the translated text without any other text.
254
+ `;
255
+
256
+ let stream;
257
+ let isProcessing = false;
258
+ let processor, model;
259
+ async function initModel() {
260
+ const modelId = 'HuggingFaceTB/SmolVLM-500M-Instruct'; // or "HuggingFaceTB/SmolVLM-Instruct";
261
+ loadingOverlay.style.display = 'flex';
262
+ responseText.value = 'Loading processor...';
263
+ processor = await AutoProcessor.from_pretrained(modelId);
264
+ responseText.value = 'Processor loaded. Loading model...';
265
+ model = await AutoModelForVision2Seq.from_pretrained(modelId, {
266
+ dtype: {
267
+ embed_tokens: 'fp16',
268
+ vision_encoder: 'q4',
269
+ decoder_model_merged: 'q4',
270
+ },
271
+ device: 'webgpu',
272
+ });
273
+ responseText.value = 'Model loaded. Initializing camera...';
274
+ loadingOverlay.style.display = 'none';
275
+ }
276
+ async function initCamera() {
277
+ try {
278
+ stream = await navigator.mediaDevices.getUserMedia({
279
+ video: true,
280
+ audio: false,
281
+ });
282
+ video.srcObject = stream;
283
+ responseText.value = 'Camera access granted. Ready to start.';
284
+ } catch (err) {
285
+ console.error('Error accessing camera:', err);
286
+ responseText.value = `Error accessing camera: ${err.name} - ${err.message}. Please ensure permissions are granted and you are on HTTPS or localhost.`;
287
+ alert(
288
+ `Error accessing camera: ${err.name}. Make sure you've granted permission and are on HTTPS or localhost.`
289
+ );
 
290
  }
291
+ }
292
+ function captureImage() {
293
+ if (!stream || !video.videoWidth) {
294
+ console.warn('Video stream not ready for capture.');
295
+ return null;
 
 
 
 
 
 
296
  }
297
+ canvas.width = video.videoWidth;
298
+ canvas.height = video.videoHeight;
299
+ const context = canvas.getContext('2d', {
300
+ willReadFrequently: true,
301
+ });
302
+ context.drawImage(video, 0, 0, canvas.width, canvas.height);
303
+ const frame = context.getImageData(
304
+ 0,
305
+ 0,
306
+ canvas.width,
307
+ canvas.height
308
+ );
309
+ return new RawImage(frame.data, frame.width, frame.height, 4);
310
+ }
311
+ async function runLocalVisionInference(imgElement, instruction) {
312
+ const messages = [
313
+ {
314
+ role: 'user',
315
+ content: [{ type: 'image' }, { type: 'text', text: instruction }],
316
+ },
317
+ ];
318
+
319
+ const text = processor.apply_chat_template(messages, {
320
+ add_generation_prompt: true,
321
+ });
322
+
323
+ const inputs = await processor(text, [imgElement], {
324
+ do_image_splitting: false,
325
+ });
326
+
327
+ const generatedIds = await model.generate({
328
+ ...inputs,
329
+ max_new_tokens: 100,
330
+ });
331
+
332
+ const output = processor.batch_decode(
333
+ generatedIds.slice(null, [inputs.input_ids.dims.at(-1), null]),
334
+ { skip_special_tokens: true }
335
+ );
336
+ return output[0].trim();
337
+ }
338
+
339
+ async function callExternalLLmAPI(text) {
340
+ let response = await fetch(
341
+ 'https://openrouter.ai/api/v1/chat/completions',
342
+ {
343
+ method: 'POST',
344
+ headers: {
345
+ Authorization:
346
+ 'Bearer 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',
347
+ 'HTTP-Referer': '<YOUR_SITE_URL>', // Optional. Site URL for rankings on openrouter.ai.
348
+ 'X-Title': '<YOUR_SITE_NAME>', // Optional. Site title for rankings on openrouter.ai.
349
+ 'Content-Type': 'application/json',
350
+ },
351
+ body: JSON.stringify({
352
+ model: 'qwen/qwen-2.5-72b-instruct:free',
353
+ messages: [
354
  {
355
+ role: 'system',
356
+ content: CONTEXT,
357
  },
358
+ {
359
+ role: 'user',
360
+ content: text,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
361
  },
362
+ ],
363
+ }),
364
+ }
365
+ );
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
366
 
367
+ if (!response.ok) {
368
+ throw new Error(`HTTP error! Status: ${response.status}`);
 
 
 
 
 
 
 
 
 
 
 
369
  }
370
 
371
+ const data = await response.json();
372
+ const generatedText = data.choices[0].message.content;
373
+ return generatedText;
374
+ }
375
+
376
+ async function callExternalLLmAPI2(text) {
377
+ const response = await openai.chat.completions.create({
378
+ messages: [
379
+ { role: 'system', content: CONTEXT },
380
+ { role: 'user', content: text },
381
+ ],
382
+ model: 'gpt-4o',
383
+ });
384
+
385
+ let generatedText = response.choices[0].message.content;
386
+ generatedText = generatedText.trim();
387
+ return generatedText;
388
+ }
389
+
390
+ async function sendData() {
391
+ if (!isProcessing) return;
392
+ const instruction = 'What do you see?';
393
+ const rawImg = captureImage();
394
+ if (!rawImg) {
395
+ responseText.value = 'Capture failed';
396
+ return;
397
  }
398
+ try {
399
+ const reply = await runLocalVisionInference(rawImg, instruction);
400
+ const translatedReply = await callExternalLLmAPI2(reply);
401
+ responseText.value = translatedReply;
402
+ } catch (e) {
403
+ console.error(e);
404
+ responseText.value = `Error: ${e.message}`;
405
  }
406
+ }
407
+ function sleep(ms) {
408
+ return new Promise(resolve => setTimeout(resolve, ms));
409
+ }
410
+ async function processingLoop() {
411
+ const intervalMs = parseInt(intervalSelect.value, 10);
412
+ while (isProcessing) {
413
+ await sendData();
414
+ if (!isProcessing) break;
415
+ await sleep(intervalMs);
416
  }
417
+ }
418
+ function handleStart() {
419
+ if (!stream) {
420
+ responseText.value = 'Camera not available. Cannot start.';
421
+ alert('Camera not available. Please grant permission first.');
422
+ return;
 
 
 
 
 
 
 
423
  }
424
+ isProcessing = true;
425
+ startButton.textContent = 'توقف';
426
+ startButton.classList.add('running');
427
+ startButton.classList.remove('primary');
428
+ intervalSelect.disabled = true;
429
+ responseText.value = 'Processing started...';
430
+ processingLoop();
431
+ }
432
+ function handleStop() {
433
+ isProcessing = false;
434
+ startButton.textContent = 'شروع';
435
+ startButton.classList.remove('running');
436
+ startButton.classList.add('primary');
437
+ intervalSelect.disabled = false;
438
+ if (responseText.value.startsWith('Processing started...')) {
439
+ responseText.value = 'Processing stopped.';
440
  }
441
+ }
442
+ startButton.addEventListener('click', () => {
443
+ if (isProcessing) {
444
+ handleStop();
445
+ } else {
446
+ handleStart();
447
+ }
448
+ });
449
+ window.addEventListener('DOMContentLoaded', async () => {
450
+ if (!navigator.gpu) {
451
+ const videoElement = document.getElementById('videoFeed');
452
+ const warningElement = document.createElement('p');
453
+ warningElement.textContent = 'WebGPU is not available in this browser.';
454
+ warningElement.style.color = 'red';
455
+ warningElement.style.textAlign = 'center';
456
+ videoElement.parentNode.insertBefore(
457
+ warningElement,
458
+ videoElement.nextSibling
459
+ );
460
+ }
461
+ await initModel();
462
+ await initCamera();
463
+ responseText.placeholder = 'پاسخ سرور اینجا نمایش داده می‌شود...';
464
+ startButton.textContent = isProcessing ? 'توقف' : 'شروع';
465
+ });
466
+ window.addEventListener('beforeunload', () => {
467
+ if (stream) {
468
+ stream.getTracks().forEach(track => track.stop());
469
+ }
470
+ });
 
 
 
471
  </script>
472
  </body>
473
  </html>