pvanand commited on
Commit
7225716
·
verified ·
1 Parent(s): 78b1a19

Update public/index.html

Browse files
Files changed (1) hide show
  1. public/index.html +135 -257
public/index.html CHANGED
@@ -1,192 +1,3 @@
1
- <!DOCTYPE html>
2
- <html>
3
- <head>
4
- <meta charset="utf-8"/>
5
- <title>HTML Compressor for LLM</title>
6
- <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/prism/1.24.1/themes/prism.min.css">
7
- <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/prism/1.24.1/plugins/toolbar/prism-toolbar.min.css">
8
- <style>
9
- :root {
10
- --primary-color: #007bff;
11
- --secondary-color: #6c757d;
12
- --success-color: #28a745;
13
- --border-color: #dee2e6;
14
- --background-color: #f8f9fa;
15
- }
16
- body {
17
- font-family: system-ui, -apple-system, sans-serif;
18
- line-height: 1.6;
19
- margin: 0;
20
- padding: 20px;
21
- background: var(--background-color);
22
- }
23
- .container {
24
- max-width: 1200px;
25
- margin: 0 auto;
26
- background: white;
27
- padding: 30px;
28
- border-radius: 8px;
29
- box-shadow: 0 2px 4px rgba(0,0,0,0.1);
30
- }
31
- textarea {
32
- width: 100%;
33
- height: 200px;
34
- padding: 12px;
35
- border: 1px solid var(--border-color);
36
- border-radius: 4px;
37
- font-family: 'Monaco', 'Menlo', monospace;
38
- font-size: 14px;
39
- resize: vertical;
40
- margin-bottom: 15px;
41
- }
42
- .options-container {
43
- background: var(--background-color);
44
- padding: 20px;
45
- border-radius: 8px;
46
- margin: 20px 0;
47
- }
48
- .option-grid {
49
- display: grid;
50
- grid-template-columns: repeat(auto-fit, minmax(250px, 1fr));
51
- gap: 15px;
52
- }
53
- .option-item {
54
- display: flex;
55
- align-items: center;
56
- gap: 10px;
57
- }
58
- .button-group {
59
- display: flex;
60
- gap: 10px;
61
- margin: 15px 0;
62
- }
63
- button {
64
- background: var(--primary-color);
65
- color: white;
66
- padding: 8px 16px;
67
- border: none;
68
- border-radius: 4px;
69
- cursor: pointer;
70
- transition: background 0.2s;
71
- }
72
- button:hover {
73
- background: #0056b3;
74
- }
75
- .results-container {
76
- margin-top: 30px;
77
- }
78
- .results-tabs {
79
- display: flex;
80
- gap: 10px;
81
- margin-bottom: 15px;
82
- }
83
- .tab {
84
- padding: 8px 16px;
85
- cursor: pointer;
86
- border: 1px solid var(--border-color);
87
- border-radius: 4px;
88
- transition: all 0.2s;
89
- }
90
- .tab.active {
91
- background: var(--primary-color);
92
- color: white;
93
- }
94
- .result-panel {
95
- border: 1px solid var(--border-color);
96
- border-radius: 4px;
97
- overflow: hidden;
98
- }
99
- .result-header {
100
- display: flex;
101
- justify-content: space-between;
102
- align-items: center;
103
- padding: 10px;
104
- background: var(--background-color);
105
- border-bottom: 1px solid var(--border-color);
106
- }
107
- .result-content {
108
- padding: 15px;
109
- overflow: auto;
110
- max-height: 500px;
111
- }
112
- .stats-grid {
113
- display: grid;
114
- grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
115
- gap: 15px;
116
- margin: 20px 0;
117
- }
118
- .stat-item {
119
- background: white;
120
- padding: 15px;
121
- border-radius: 4px;
122
- border: 1px solid var(--border-color);
123
- }
124
- .stat-value {
125
- font-size: 1.2em;
126
- font-weight: bold;
127
- color: var(--primary-color);
128
- }
129
- .copy-feedback {
130
- position: fixed;
131
- bottom: 20px;
132
- right: 20px;
133
- background: var(--success-color);
134
- color: white;
135
- padding: 10px 20px;
136
- border-radius: 4px;
137
- display: none;
138
- }
139
- .operation-status {
140
- margin: 20px 0;
141
- padding: 15px;
142
- border: 1px solid var(--border-color);
143
- border-radius: 4px;
144
- }
145
- .status-grid {
146
- display: grid;
147
- grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
148
- gap: 10px;
149
- margin-top: 10px;
150
- }
151
- .status-item {
152
- display: flex;
153
- align-items: center;
154
- gap: 8px;
155
- padding: 8px;
156
- border-radius: 4px;
157
- background: var(--background-color);
158
- }
159
- .status-icon {
160
- width: 20px;
161
- height: 20px;
162
- border-radius: 50%;
163
- display: flex;
164
- align-items: center;
165
- justify-content: center;
166
- color: white;
167
- font-size: 12px;
168
- }
169
- .status-success {
170
- background: var(--success-color);
171
- }
172
- .status-error {
173
- background: #dc3545;
174
- }
175
- .status-message {
176
- font-size: 0.9em;
177
- color: #666;
178
- margin-top: 4px;
179
- }
180
- pre {
181
- margin: 0;
182
- border-radius: 4px;
183
- }
184
- code {
185
- font-family: 'Monaco', 'Menlo', monospace;
186
- font-size: 14px;
187
- }
188
- </style>
189
- </head>
190
  <body>
191
  <div class="container">
192
  <h1>HTML Compressor for LLM</h1>
@@ -237,6 +48,18 @@
237
  </div>
238
  </div>
239
 
 
 
 
 
 
 
 
 
 
 
 
 
240
  <div class="button-group">
241
  <input type="file" accept=".html,.htm" id="fileInput">
242
  <button type="submit">Process HTML</button>
@@ -254,7 +77,7 @@
254
  <div class="results-tabs">
255
  <div class="tab active" data-view="html">Compressed HTML</div>
256
  <div class="tab" data-view="json">JSON Structure</div>
257
- <div class="tab" data-view="extracted">Extracted Data</div>
258
  </div>
259
 
260
  <div class="result-panel" id="htmlView">
@@ -283,16 +106,16 @@
283
  </div>
284
  </div>
285
 
286
- <div class="result-panel" id="extractedView" style="display: none;">
287
  <div class="result-header">
288
- <h3>Extracted Data</h3>
289
  <div class="button-group">
290
- <button onclick="copyResult('extracted')">Copy</button>
291
- <button onclick="downloadResult('extracted')">Download</button>
292
  </div>
293
  </div>
294
  <div class="result-content">
295
- <pre><code class="language-json" id="extractedOutput"></code></pre>
296
  </div>
297
  </div>
298
  </div>
@@ -304,6 +127,11 @@
304
  <script src="https://cdnjs.cloudflare.com/ajax/libs/prism/1.24.1/components/prism-markup.min.js"></script>
305
  <script src="https://cdnjs.cloudflare.com/ajax/libs/prism/1.24.1/components/prism-json.min.js"></script>
306
  <script>
 
 
 
 
 
307
  const form = document.getElementById('compressorForm');
308
  const fileInput = document.getElementById('fileInput');
309
  const htmlInput = document.getElementById('htmlInput');
@@ -314,12 +142,15 @@
314
  // Tab switching
315
  document.querySelectorAll('.tab').forEach(tab => {
316
  tab.addEventListener('click', () => {
 
317
  document.querySelectorAll('.tab').forEach(t => t.classList.remove('active'));
318
  tab.classList.add('active');
 
 
319
  const view = tab.dataset.view;
320
  document.getElementById('htmlView').style.display = view === 'html' ? 'block' : 'none';
321
  document.getElementById('jsonView').style.display = view === 'json' ? 'block' : 'none';
322
- document.getElementById('extractedView').style.display = view === 'extracted' ? 'block' : 'none';
323
  });
324
  });
325
 
@@ -333,35 +164,115 @@
333
  }
334
  });
335
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
336
  // Form submission
337
  form.addEventListener('submit', async (e) => {
338
  e.preventDefault();
 
339
  const formData = new FormData(form);
340
-
 
341
  document.querySelectorAll('input[type="checkbox"]').forEach(checkbox => {
342
  formData.set(checkbox.name, checkbox.checked);
343
  });
344
-
345
  try {
346
- // Process HTML compression
347
  const response = await fetch('/process', {
348
  method: 'POST',
349
  body: formData,
350
  });
 
351
  const data = await response.json();
352
-
353
  if (data.error) {
354
  alert(data.error);
355
  return;
356
  }
357
 
358
- // Update UI elements
359
  const statusContainer = document.querySelector('#operationStatus');
360
  const statusGrid = statusContainer.querySelector('.status-grid');
361
  statusContainer.style.display = 'block';
 
362
  statusGrid.innerHTML = Object.entries(data.operationStatus)
363
- .map(([operation, status]) =>
364
- `<div class="status-item">
365
  <div class="status-icon ${status.success ? 'status-success' : 'status-error'}">
366
  ${status.success ? '✓' : '✗'}
367
  </div>
@@ -369,66 +280,37 @@
369
  <div>${formatLabel(operation)}</div>
370
  ${status.error ? `<div class="status-message">Error: ${status.error}</div>` : ''}
371
  </div>
372
- </div>`
373
- ).join('');
374
 
 
375
  statsContainer.style.display = 'grid';
376
  statsContainer.innerHTML = Object.entries(data.stats)
377
- .map(([key, value]) =>
378
- `<div class="stat-item">
379
  <div class="stat-label">${formatLabel(key)}</div>
380
  <div class="stat-value">${value}</div>
381
- </div>`
382
- ).join('');
383
 
 
384
  resultsContainer.style.display = 'block';
 
 
385
  document.getElementById('htmlOutput').textContent = data.result.html;
386
  document.getElementById('jsonOutput').textContent = data.result.json;
 
 
387
  Prism.highlightAll();
388
-
389
- // Generate Cheerio script
390
- const scriptResponse = await fetch(
391
- 'https://elevatics-ai-web-scraper-chat.hf.space/api/v1/generate-cheerio-script',
392
- {
393
- method: 'POST',
394
- headers: {
395
- 'accept': 'application/json',
396
- 'X-API-Key': 'ae54a922-ed3a-4634-be4a-4e4dd470800a',
397
- 'Content-Type': 'application/json'
398
- },
399
- body: JSON.stringify({
400
- html: data.result.html,
401
- user_input: "",
402
- api_key: ""
403
- })
404
- }
405
- );
406
-
407
- const scriptData = await scriptResponse.json();
408
- if (scriptData.status !== 'success') {
409
- throw new Error('Script generation failed: ' + scriptData.message);
410
- }
411
-
412
- // Extract data using generated script
413
- const extractResponse = await fetch('/extract', {
414
- method: 'POST',
415
- headers: { 'Content-Type': 'application/json' },
416
- body: JSON.stringify({
417
- html: data.result.html,
418
- script: scriptData.cheerio_script
419
- })
420
- });
421
-
422
- const extractedData = await extractResponse.json();
423
- document.getElementById('extractedOutput').textContent =
424
- JSON.stringify(extractedData, null, 2);
425
- Prism.highlightAll();
426
-
427
  } catch (err) {
428
- alert(`Processing error: ${err.message}`);
429
  }
430
  });
431
 
 
 
 
 
432
  // Utility functions
433
  function formatLabel(key) {
434
  return key
@@ -440,8 +322,7 @@
440
  }
441
 
442
  async function copyResult(type) {
443
- const elementId = `${type}Output`;
444
- const content = document.getElementById(elementId).textContent;
445
  try {
446
  await navigator.clipboard.writeText(content);
447
  showCopyFeedback();
@@ -451,14 +332,12 @@
451
  }
452
 
453
  function downloadResult(type) {
454
- const elementId = `${type}Output`;
455
- const content = document.getElementById(elementId).textContent;
456
- const ext = type === 'extracted' ? 'json' : type;
457
  const blob = new Blob([content], { type: 'text/plain' });
458
  const url = URL.createObjectURL(blob);
459
  const a = document.createElement('a');
460
  a.href = url;
461
- a.download = `compressed.${ext}`;
462
  document.body.appendChild(a);
463
  a.click();
464
  document.body.removeChild(a);
@@ -472,5 +351,4 @@
472
  }, 2000);
473
  }
474
  </script>
475
- </body>
476
- </html>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  <body>
2
  <div class="container">
3
  <h1>HTML Compressor for LLM</h1>
 
48
  </div>
49
  </div>
50
 
51
+ <div class="extraction-container">
52
+ <h3>Data Extraction</h3>
53
+ <textarea
54
+ id="extractionQuery"
55
+ placeholder="Enter your extraction query (e.g., 'extract product title and price')"
56
+ ></textarea>
57
+ <div class="button-group">
58
+ <button type="button" id="generateScript">Generate Extraction Script</button>
59
+ <button type="button" id="executeExtraction" disabled>Execute Extraction</button>
60
+ </div>
61
+ </div>
62
+
63
  <div class="button-group">
64
  <input type="file" accept=".html,.htm" id="fileInput">
65
  <button type="submit">Process HTML</button>
 
77
  <div class="results-tabs">
78
  <div class="tab active" data-view="html">Compressed HTML</div>
79
  <div class="tab" data-view="json">JSON Structure</div>
80
+ <div class="tab" data-view="extraction">Extraction Results</div>
81
  </div>
82
 
83
  <div class="result-panel" id="htmlView">
 
106
  </div>
107
  </div>
108
 
109
+ <div class="result-panel" id="extractionView" style="display: none;">
110
  <div class="result-header">
111
+ <h3>Extraction Results</h3>
112
  <div class="button-group">
113
+ <button onclick="copyResult('extraction')">Copy</button>
114
+ <button onclick="downloadResult('extraction')">Download</button>
115
  </div>
116
  </div>
117
  <div class="result-content">
118
+ <pre><code class="language-json" id="extractionOutput"></code></pre>
119
  </div>
120
  </div>
121
  </div>
 
127
  <script src="https://cdnjs.cloudflare.com/ajax/libs/prism/1.24.1/components/prism-markup.min.js"></script>
128
  <script src="https://cdnjs.cloudflare.com/ajax/libs/prism/1.24.1/components/prism-json.min.js"></script>
129
  <script>
130
+ const API_URL = 'https://elevatics-ai-web-scraper-chat.hf.space/api/v1/generate-cheerio-script';
131
+ const API_KEY = 'ae54a922-ed3a-4634-be4a-4e4dd470800a';
132
+
133
+ let currentCheerioScript = null;
134
+
135
  const form = document.getElementById('compressorForm');
136
  const fileInput = document.getElementById('fileInput');
137
  const htmlInput = document.getElementById('htmlInput');
 
142
  // Tab switching
143
  document.querySelectorAll('.tab').forEach(tab => {
144
  tab.addEventListener('click', () => {
145
+ // Update tabs
146
  document.querySelectorAll('.tab').forEach(t => t.classList.remove('active'));
147
  tab.classList.add('active');
148
+
149
+ // Update views
150
  const view = tab.dataset.view;
151
  document.getElementById('htmlView').style.display = view === 'html' ? 'block' : 'none';
152
  document.getElementById('jsonView').style.display = view === 'json' ? 'block' : 'none';
153
+ document.getElementById('extractionView').style.display = view === 'extraction' ? 'block' : 'none';
154
  });
155
  });
156
 
 
164
  }
165
  });
166
 
167
+ // Cheerio script generation
168
+ async function generateCheerioScript() {
169
+ const htmlContent = document.getElementById('htmlOutput').textContent;
170
+ const userInput = document.getElementById('extractionQuery').value;
171
+
172
+ if (!htmlContent || !userInput) {
173
+ alert('Please process HTML and enter an extraction query first');
174
+ return;
175
+ }
176
+
177
+ try {
178
+ const response = await fetch(API_URL, {
179
+ method: 'POST',
180
+ headers: {
181
+ 'accept': 'application/json',
182
+ 'x-api-key': API_KEY,
183
+ 'Content-Type': 'application/json',
184
+ },
185
+ body: JSON.stringify({
186
+ html: htmlContent,
187
+ user_input: userInput
188
+ })
189
+ });
190
+
191
+ const data = await response.json();
192
+
193
+ if (data.status === 'success') {
194
+ currentCheerioScript = data.cheerio_script;
195
+ document.getElementById('executeExtraction').disabled = false;
196
+
197
+ // Show the script in the JSON view
198
+ document.getElementById('jsonOutput').textContent = JSON.stringify({
199
+ cheerio_script: currentCheerioScript
200
+ }, null, 2);
201
+ Prism.highlightAll();
202
+ } else {
203
+ alert('Failed to generate extraction script');
204
+ }
205
+ } catch (error) {
206
+ alert('Error generating script: ' + error.message);
207
+ }
208
+ }
209
+
210
+ // Execute extraction
211
+ async function executeExtraction() {
212
+ if (!currentCheerioScript) {
213
+ alert('Please generate an extraction script first');
214
+ return;
215
+ }
216
+
217
+ const htmlContent = document.getElementById('htmlOutput').textContent;
218
+
219
+ try {
220
+ const response = await fetch('/extract', {
221
+ method: 'POST',
222
+ headers: {
223
+ 'Content-Type': 'application/json'
224
+ },
225
+ body: JSON.stringify({
226
+ html: htmlContent,
227
+ script: currentCheerioScript
228
+ })
229
+ });
230
+
231
+ const data = await response.json();
232
+
233
+ document.getElementById('extractionOutput').textContent =
234
+ JSON.stringify(data, null, 2);
235
+ Prism.highlightAll();
236
+
237
+ // Switch to extraction view
238
+ document.querySelector('[data-view="extraction"]').click();
239
+ } catch (error) {
240
+ alert('Error executing extraction: ' + error.message);
241
+ }
242
+ }
243
+
244
  // Form submission
245
  form.addEventListener('submit', async (e) => {
246
  e.preventDefault();
247
+
248
  const formData = new FormData(form);
249
+
250
+ // Add checkbox states
251
  document.querySelectorAll('input[type="checkbox"]').forEach(checkbox => {
252
  formData.set(checkbox.name, checkbox.checked);
253
  });
254
+
255
  try {
 
256
  const response = await fetch('/process', {
257
  method: 'POST',
258
  body: formData,
259
  });
260
+
261
  const data = await response.json();
262
+
263
  if (data.error) {
264
  alert(data.error);
265
  return;
266
  }
267
 
268
+ // Display operation status
269
  const statusContainer = document.querySelector('#operationStatus');
270
  const statusGrid = statusContainer.querySelector('.status-grid');
271
  statusContainer.style.display = 'block';
272
+
273
  statusGrid.innerHTML = Object.entries(data.operationStatus)
274
+ .map(([operation, status]) => `
275
+ <div class="status-item">
276
  <div class="status-icon ${status.success ? 'status-success' : 'status-error'}">
277
  ${status.success ? '✓' : '✗'}
278
  </div>
 
280
  <div>${formatLabel(operation)}</div>
281
  ${status.error ? `<div class="status-message">Error: ${status.error}</div>` : ''}
282
  </div>
283
+ </div>
284
+ `).join('');
285
 
286
+ // Display stats
287
  statsContainer.style.display = 'grid';
288
  statsContainer.innerHTML = Object.entries(data.stats)
289
+ .map(([key, value]) => `
290
+ <div class="stat-item">
291
  <div class="stat-label">${formatLabel(key)}</div>
292
  <div class="stat-value">${value}</div>
293
+ </div>
294
+ `).join('');
295
 
296
+ // Show results container
297
  resultsContainer.style.display = 'block';
298
+
299
+ // Update outputs with syntax highlighting
300
  document.getElementById('htmlOutput').textContent = data.result.html;
301
  document.getElementById('jsonOutput').textContent = data.result.json;
302
+
303
+ // Trigger Prism highlighting
304
  Prism.highlightAll();
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
305
  } catch (err) {
306
+ alert('Error processing HTML: ' + err.message);
307
  }
308
  });
309
 
310
+ // Event listeners for extraction
311
+ document.getElementById('generateScript').addEventListener('click', generateCheerioScript);
312
+ document.getElementById('executeExtraction').addEventListener('click', executeExtraction);
313
+
314
  // Utility functions
315
  function formatLabel(key) {
316
  return key
 
322
  }
323
 
324
  async function copyResult(type) {
325
+ const content = document.getElementById(`${type}Output`).textContent;
 
326
  try {
327
  await navigator.clipboard.writeText(content);
328
  showCopyFeedback();
 
332
  }
333
 
334
  function downloadResult(type) {
335
+ const content = document.getElementById(`${type}Output`).textContent;
 
 
336
  const blob = new Blob([content], { type: 'text/plain' });
337
  const url = URL.createObjectURL(blob);
338
  const a = document.createElement('a');
339
  a.href = url;
340
+ a.download = `compressed.${type}`;
341
  document.body.appendChild(a);
342
  a.click();
343
  document.body.removeChild(a);
 
351
  }, 2000);
352
  }
353
  </script>
354
+ </body>