pvanand commited on
Commit
2f35e5f
·
verified ·
1 Parent(s): f7737bb

Update public/index.html

Browse files
Files changed (1) hide show
  1. public/index.html +118 -515
public/index.html CHANGED
@@ -49,542 +49,145 @@
49
  border-radius: 8px;
50
  margin: 20px 0;
51
  }
52
-
53
- .option-grid {
54
- display: grid;
55
- grid-template-columns: repeat(auto-fit, minmax(250px, 1fr));
56
- gap: 15px;
57
- }
58
-
59
- .option-item {
60
- display: flex;
61
- align-items: center;
62
- gap: 10px;
63
- }
64
-
65
- .button-group {
66
- display: flex;
67
- gap: 10px;
68
- margin: 15px 0;
69
- }
70
-
71
- button {
72
- background: var(--primary-color);
73
- color: white;
74
- padding: 8px 16px;
75
- border: none;
76
- border-radius: 4px;
77
- cursor: pointer;
78
- transition: background 0.2s;
79
- }
80
-
81
- button:hover {
82
- background: #0056b3;
83
- }
84
-
85
- .results-container {
86
- margin-top: 30px;
87
- }
88
-
89
- .results-tabs {
90
- display: flex;
91
- gap: 10px;
92
- margin-bottom: 15px;
93
- }
94
-
95
- .tab {
96
- padding: 8px 16px;
97
- cursor: pointer;
98
- border: 1px solid var(--border-color);
99
- border-radius: 4px;
100
- transition: all 0.2s;
101
- }
102
-
103
- .tab.active {
104
- background: var(--primary-color);
105
- color: white;
106
- }
107
-
108
- .result-panel {
109
- border: 1px solid var(--border-color);
110
- border-radius: 4px;
111
- overflow: hidden;
112
- }
113
-
114
- .result-header {
115
- display: flex;
116
- justify-content: space-between;
117
- align-items: center;
118
- padding: 10px;
119
- background: var(--background-color);
120
- border-bottom: 1px solid var(--border-color);
121
- }
122
-
123
- .result-content {
124
- padding: 15px;
125
- overflow: auto;
126
- max-height: 500px;
127
- }
128
-
129
- .stats-grid {
130
- display: grid;
131
- grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
132
- gap: 15px;
133
- margin: 20px 0;
134
- }
135
-
136
- .stat-item {
137
- background: white;
138
- padding: 15px;
139
- border-radius: 4px;
140
- border: 1px solid var(--border-color);
141
- }
142
-
143
- .stat-value {
144
- font-size: 1.2em;
145
- font-weight: bold;
146
- color: var(--primary-color);
147
- }
148
-
149
- .copy-feedback {
150
- position: fixed;
151
- bottom: 20px;
152
- right: 20px;
153
- background: var(--success-color);
154
- color: white;
155
- padding: 10px 20px;
156
- border-radius: 4px;
157
- display: none;
158
- }
159
-
160
- .operation-status {
161
- margin: 20px 0;
162
- padding: 15px;
163
- border: 1px solid var(--border-color);
164
- border-radius: 4px;
165
- }
166
-
167
- .status-grid {
168
- display: grid;
169
- grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
170
- gap: 10px;
171
- margin-top: 10px;
172
- }
173
-
174
- .status-item {
175
- display: flex;
176
- align-items: center;
177
- gap: 8px;
178
- padding: 8px;
179
- border-radius: 4px;
180
- background: var(--background-color);
181
  }
182
 
183
- .status-icon {
184
- width: 20px;
185
- height: 20px;
186
- border-radius: 50%;
187
- display: flex;
188
- align-items: center;
189
- justify-content: center;
190
- color: white;
191
- font-size: 12px;
192
  }
193
 
194
- .status-success {
195
- background: var(--success-color);
 
 
 
 
196
  }
197
 
198
- .status-error {
199
- background: #dc3545;
 
 
 
200
  }
201
 
202
- .status-message {
203
- font-size: 0.9em;
204
- color: #666;
205
- margin-top: 4px;
 
 
206
  }
207
-
208
- pre {
209
- margin: 0;
210
- border-radius: 4px;
211
- }
212
-
213
- code {
214
- font-family: 'Monaco', 'Menlo', monospace;
215
- font-size: 14px;
216
- }
217
- /* Add to the existing style section */
218
- .extraction-container {
219
- margin: 20px 0;
220
- padding: 20px;
221
- background: var(--background-color);
222
- border-radius: 8px;
223
- }
224
-
225
- .extraction-container textarea {
226
- height: 100px;
227
- margin-bottom: 10px;
228
- }
229
-
230
- #executeExtraction:disabled {
231
- background: var(--secondary-color);
232
- cursor: not-allowed;
233
- }
234
- </style>
235
  </head>
236
  <body>
237
  <div class="container">
238
- <h1>HTML Compressor for LLM</h1>
239
- <p>Compress HTML content for optimal LLM processing while preserving essential structure.</p>
240
-
241
- <form id="compressorForm">
242
- <textarea
243
- name="html"
244
- id="htmlInput"
245
- placeholder="Paste your HTML here or upload a file..."
246
- ></textarea>
247
-
248
- <div class="options-container">
249
- <h3>Compression Options</h3>
250
- <div class="option-grid">
251
- <div class="option-item">
252
- <input type="checkbox" id="cleanHead" name="cleanHead" checked>
253
- <label for="cleanHead">Clean head section</label>
254
- </div>
255
- <div class="option-item">
256
- <input type="checkbox" id="removeScripts" name="removeScripts" checked>
257
- <label for="removeScripts">Remove scripts</label>
258
- </div>
259
- <div class="option-item">
260
- <input type="checkbox" id="removeStyles" name="removeStyles" checked>
261
- <label for="removeStyles">Remove styles</label>
262
- </div>
263
- <div class="option-item">
264
- <input type="checkbox" id="handleRepeatingElements" name="handleRepeatingElements" checked>
265
- <label for="handleRepeatingElements">Handle repeating elements</label>
266
- </div>
267
- <div class="option-item">
268
- <input type="checkbox" id="truncateText" name="truncateText" checked>
269
- <label for="truncateText">Truncate text</label>
270
- </div>
271
- <div class="option-item">
272
- <label for="truncateLength">Max text length:</label>
273
- <input type="number" id="truncateLength" name="truncateLength" value="100" min="10" max="1000">
274
- </div>
275
- <div class="option-item">
276
- <input type="checkbox" id="minifyHtml" name="minifyHtml" checked>
277
- <label for="minifyHtml">Minify HTML</label>
278
- </div>
279
- <div class="option-item">
280
- <input type="checkbox" id="removeMedia" name="removeMedia" checked>
281
- <label for="removeMedia">Remove media</label>
282
- </div>
283
- </div>
284
- </div>
285
-
286
- <div class="extraction-container">
287
- <h3>Data Extraction</h3>
288
- <textarea
289
- id="extractionQuery"
290
- placeholder="Enter your extraction query (e.g., 'extract product title and price')"
291
- ></textarea>
292
- <div class="button-group">
293
- <button type="button" id="generateScript">Generate Extraction Script</button>
294
- <button type="button" id="executeExtraction" disabled>Execute Extraction</button>
295
- </div>
296
- </div>
297
-
298
- <div class="button-group">
299
- <input type="file" accept=".html,.htm" id="fileInput">
300
- <button type="submit">Process HTML</button>
301
- </div>
302
- </form>
303
-
304
- <div id="operationStatus" class="operation-status" style="display: none;">
305
- <h3>Operation Status</h3>
306
- <div class="status-grid"></div>
307
- </div>
308
-
309
- <div id="stats" class="stats-grid" style="display: none;"></div>
310
-
311
- <div class="results-container" style="display: none;">
312
- <div class="results-tabs">
313
- <div class="tab active" data-view="html">Compressed HTML</div>
314
- <div class="tab" data-view="json">JSON Structure</div>
315
- <div class="tab" data-view="extraction">Extraction Results</div>
316
- </div>
317
-
318
- <div class="result-panel" id="htmlView">
319
- <div class="result-header">
320
- <h3>HTML Output</h3>
321
  <div class="button-group">
322
- <button onclick="copyResult('html')">Copy</button>
323
- <button onclick="downloadResult('html')">Download</button>
324
  </div>
325
- </div>
326
- <div class="result-content">
327
- <pre><code class="language-html" id="htmlOutput"></code></pre>
328
- </div>
329
  </div>
330
-
331
- <div class="result-panel" id="jsonView" style="display: none;">
332
- <div class="result-header">
333
- <h3>JSON Structure</h3>
334
- <div class="button-group">
335
- <button onclick="copyResult('json')">Copy</button>
336
- <button onclick="downloadResult('json')">Download</button>
337
- </div>
338
- </div>
339
- <div class="result-content">
340
- <pre><code class="language-json" id="jsonOutput"></code></pre>
341
- </div>
342
  </div>
343
-
344
- <div class="result-panel" id="extractionView" style="display: none;">
345
- <div class="result-header">
346
- <h3>Extraction Results</h3>
347
- <div class="button-group">
348
- <button onclick="copyResult('extraction')">Copy</button>
349
- <button onclick="downloadResult('extraction')">Download</button>
350
- </div>
351
- </div>
352
- <div class="result-content">
353
  <pre><code class="language-json" id="extractionOutput"></code></pre>
354
- </div>
355
  </div>
356
- </div>
357
-
358
- <div class="copy-feedback">Copied to clipboard!</div>
359
  </div>
360
-
361
- <script src="https://cdnjs.cloudflare.com/ajax/libs/prism/1.24.1/prism.min.js"></script>
362
- <script src="https://cdnjs.cloudflare.com/ajax/libs/prism/1.24.1/components/prism-markup.min.js"></script>
363
- <script src="https://cdnjs.cloudflare.com/ajax/libs/prism/1.24.1/components/prism-json.min.js"></script>
364
  <script>
365
- const API_URL = 'https://elevatics-ai-web-scraper-chat.hf.space/api/v1/generate-cheerio-script';
366
- const API_KEY = 'ae54a922-ed3a-4634-be4a-4e4dd470800a';
367
-
368
- let currentCheerioScript = null;
369
-
370
- const form = document.getElementById('compressorForm');
371
- const fileInput = document.getElementById('fileInput');
372
- const htmlInput = document.getElementById('htmlInput');
373
- const resultsContainer = document.querySelector('.results-container');
374
- const statsContainer = document.getElementById('stats');
375
- const copyFeedback = document.querySelector('.copy-feedback');
376
-
377
- // Tab switching
378
- document.querySelectorAll('.tab').forEach(tab => {
379
- tab.addEventListener('click', () => {
380
- // Update tabs
381
- document.querySelectorAll('.tab').forEach(t => t.classList.remove('active'));
382
- tab.classList.add('active');
383
-
384
- // Update views
385
- const view = tab.dataset.view;
386
- document.getElementById('htmlView').style.display = view === 'html' ? 'block' : 'none';
387
- document.getElementById('jsonView').style.display = view === 'json' ? 'block' : 'none';
388
- document.getElementById('extractionView').style.display = view === 'extraction' ? 'block' : 'none';
 
 
 
 
 
 
 
 
 
 
 
 
389
  });
390
- });
391
-
392
- // File input handler
393
- fileInput.addEventListener('change', (e) => {
394
- const file = e.target.files[0];
395
- if (file) {
396
- const reader = new FileReader();
397
- reader.onload = (e) => htmlInput.value = e.target.result;
398
- reader.readAsText(file);
399
- }
400
- });
401
-
402
- // Cheerio script generation
403
- async function generateCheerioScript() {
404
- const htmlContent = document.getElementById('htmlOutput').textContent;
405
- const userInput = document.getElementById('extractionQuery').value;
406
-
407
- if (!htmlContent || !userInput) {
408
- alert('Please process HTML and enter an extraction query first');
409
- return;
410
- }
411
-
412
- try {
413
- const response = await fetch(API_URL, {
414
- method: 'POST',
415
- headers: {
416
- 'accept': 'application/json',
417
- 'x-api-key': API_KEY,
418
- 'Content-Type': 'application/json',
419
- },
420
- body: JSON.stringify({
421
- html: htmlContent,
422
- user_input: userInput
423
- })
424
- });
425
-
426
- const data = await response.json();
427
-
428
- if (data.status === 'success') {
429
- currentCheerioScript = data.cheerio_script;
430
- document.getElementById('executeExtraction').disabled = false;
431
-
432
- // Show the script in the JSON view
433
- document.getElementById('jsonOutput').textContent = JSON.stringify({
434
- cheerio_script: currentCheerioScript
435
- }, null, 2);
436
- Prism.highlightAll();
437
- } else {
438
- alert('Failed to generate extraction script');
439
- }
440
- } catch (error) {
441
- alert('Error generating script: ' + error.message);
442
- }
443
- }
444
-
445
- // Execute extraction
446
- async function executeExtraction() {
447
- if (!currentCheerioScript) {
448
- alert('Please generate an extraction script first');
449
- return;
450
- }
451
-
452
- const htmlContent = document.getElementById('htmlOutput').textContent;
453
-
454
- try {
455
- const response = await fetch('/extract', {
456
- method: 'POST',
457
- headers: {
458
- 'Content-Type': 'application/json'
459
- },
460
- body: JSON.stringify({
461
- html: htmlContent,
462
- script: currentCheerioScript
463
- })
464
- });
465
-
466
- const data = await response.json();
467
-
468
- document.getElementById('extractionOutput').textContent =
469
- JSON.stringify(data, null, 2);
470
- Prism.highlightAll();
471
-
472
- // Switch to extraction view
473
- document.querySelector('[data-view="extraction"]').click();
474
- } catch (error) {
475
- alert('Error executing extraction: ' + error.message);
476
- }
477
- }
478
-
479
- // Form submission
480
- form.addEventListener('submit', async (e) => {
481
- e.preventDefault();
482
-
483
- const formData = new FormData(form);
484
-
485
- // Add checkbox states
486
- document.querySelectorAll('input[type="checkbox"]').forEach(checkbox => {
487
- formData.set(checkbox.name, checkbox.checked);
488
  });
489
-
490
- try {
491
- const response = await fetch('/process', {
492
- method: 'POST',
493
- body: formData,
494
- });
495
-
496
- const data = await response.json();
497
-
498
- if (data.error) {
499
- alert(data.error);
500
- return;
501
- }
502
-
503
- // Display operation status
504
- const statusContainer = document.querySelector('#operationStatus');
505
- const statusGrid = statusContainer.querySelector('.status-grid');
506
- statusContainer.style.display = 'block';
507
-
508
- statusGrid.innerHTML = Object.entries(data.operationStatus)
509
- .map(([operation, status]) => `
510
- <div class="status-item">
511
- <div class="status-icon ${status.success ? 'status-success' : 'status-error'}">
512
- ${status.success ? '✓' : '✗'}
513
- </div>
514
- <div>
515
- <div>${formatLabel(operation)}</div>
516
- ${status.error ? `<div class="status-message">Error: ${status.error}</div>` : ''}
517
- </div>
518
- </div>
519
- `).join('');
520
-
521
- // Display stats
522
- statsContainer.style.display = 'grid';
523
- statsContainer.innerHTML = Object.entries(data.stats)
524
- .map(([key, value]) => `
525
- <div class="stat-item">
526
- <div class="stat-label">${formatLabel(key)}</div>
527
- <div class="stat-value">${value}</div>
528
- </div>
529
- `).join('');
530
-
531
- // Show results container
532
- resultsContainer.style.display = 'block';
533
-
534
- // Update outputs with syntax highlighting
535
- document.getElementById('htmlOutput').textContent = data.result.html;
536
- document.getElementById('jsonOutput').textContent = data.result.json;
537
-
538
- // Trigger Prism highlighting
539
- Prism.highlightAll();
540
- } catch (err) {
541
- alert('Error processing HTML: ' + err.message);
542
- }
543
- });
544
-
545
- // Event listeners for extraction
546
- document.getElementById('generateScript').addEventListener('click', generateCheerioScript);
547
- document.getElementById('executeExtraction').addEventListener('click', executeExtraction);
548
-
549
- // Utility functions
550
- function formatLabel(key) {
551
- return key
552
- .replace(/([A-Z])/g, ' $1')
553
- .replace(/([a-z])([A-Z])/g, '$1 $2')
554
- .toLowerCase()
555
- .replace(/^./, str => str.toUpperCase())
556
- .replace('Html', 'HTML');
557
- }
558
-
559
- async function copyResult(type) {
560
- const content = document.getElementById(`${type}Output`).textContent;
561
- try {
562
- await navigator.clipboard.writeText(content);
563
- showCopyFeedback();
564
- } catch (err) {
565
- alert('Failed to copy to clipboard');
566
- }
567
- }
568
-
569
- function downloadResult(type) {
570
- const content = document.getElementById(`${type}Output`).textContent;
571
- const blob = new Blob([content], { type: 'text/plain' });
572
- const url = URL.createObjectURL(blob);
573
- const a = document.createElement('a');
574
- a.href = url;
575
- a.download = `compressed.${type}`;
576
- document.body.appendChild(a);
577
- a.click();
578
- document.body.removeChild(a);
579
- URL.revokeObjectURL(url);
580
- }
581
-
582
- function showCopyFeedback() {
583
- copyFeedback.style.display = 'block';
584
- setTimeout(() => {
585
- copyFeedback.style.display = 'none';
586
- }, 2000);
587
- }
588
  </script>
589
- </body>
590
  </html>
 
49
  border-radius: 8px;
50
  margin: 20px 0;
51
  }
52
+ .input-section {
53
+ margin-bottom: 30px;
54
+ padding: 20px;
55
+ border: 1px solid var(--border-color);
56
+ border-radius: 8px;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  }
58
 
59
+ .script-section {
60
+ margin: 20px 0;
61
+ padding: 20px;
62
+ background: var(--background-color);
63
+ border-radius: 8px;
 
 
 
 
64
  }
65
 
66
+ #userInput {
67
+ width: 100%;
68
+ padding: 12px;
69
+ margin-bottom: 15px;
70
+ border: 1px solid var(--border-color);
71
+ border-radius: 4px;
72
  }
73
 
74
+ #scriptOutput {
75
+ background: white;
76
+ padding: 15px;
77
+ border-radius: 4px;
78
+ margin-top: 15px;
79
  }
80
 
81
+ .extraction-results {
82
+ margin-top: 20px;
83
+ padding: 15px;
84
+ background: white;
85
+ border-radius: 4px;
86
+ border: 1px solid var(--border-color);
87
  }
88
+ </style>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
  </head>
90
  <body>
91
  <div class="container">
92
+ <h1>HTML Data Extractor</h1>
93
+
94
+ <div class="input-section">
95
+ <h2>Input HTML</h2>
96
+ <textarea
97
+ id="htmlInput"
98
+ placeholder="Paste your HTML here..."
99
+ ></textarea>
100
+
101
+ <h3>Extraction Query</h3>
102
+ <input
103
+ type="text"
104
+ id="userInput"
105
+ placeholder="Describe what data you want to extract..."
106
+ >
107
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
108
  <div class="button-group">
109
+ <button id="generateScript">Generate Script</button>
110
+ <button id="extractData" disabled>Extract Data</button>
111
  </div>
 
 
 
 
112
  </div>
113
+
114
+ <div class="script-section" style="display: none;">
115
+ <h2>Generated Cheerio Script</h2>
116
+ <pre><code class="language-javascript" id="scriptOutput"></code></pre>
 
 
 
 
 
 
 
 
117
  </div>
118
+
119
+ <div class="extraction-results" style="display: none;">
120
+ <h2>Extracted Data</h2>
 
 
 
 
 
 
 
121
  <pre><code class="language-json" id="extractionOutput"></code></pre>
 
122
  </div>
 
 
 
123
  </div>
124
+
 
 
 
125
  <script>
126
+ const generateScriptBtn = document.getElementById('generateScript');
127
+ const extractDataBtn = document.getElementById('extractData');
128
+ const htmlInput = document.getElementById('htmlInput');
129
+ const userInput = document.getElementById('userInput');
130
+ const scriptSection = document.querySelector('.script-section');
131
+ const extractionResults = document.querySelector('.extraction-results');
132
+
133
+ let currentScript = '';
134
+
135
+ generateScriptBtn.addEventListener('click', async () => {
136
+ try {
137
+ const response = await fetch('https://elevatics-ai-web-scraper-chat.hf.space/api/v1/generate-cheerio-script', {
138
+ method: 'POST',
139
+ headers: {
140
+ 'accept': 'application/json',
141
+ 'x-api-key': 'ae54a922-ed3a-4634-be4a-4e4dd470800a',
142
+ 'Content-Type': 'application/json',
143
+ },
144
+ body: JSON.stringify({
145
+ html: htmlInput.value,
146
+ user_input: userInput.value
147
+ })
148
+ });
149
+
150
+ const data = await response.json();
151
+
152
+ if (data.status === 'success') {
153
+ currentScript = data.cheerio_script;
154
+ document.getElementById('scriptOutput').textContent = currentScript;
155
+ scriptSection.style.display = 'block';
156
+ extractDataBtn.disabled = false;
157
+ Prism.highlightAll();
158
+ }
159
+ } catch (error) {
160
+ alert('Error generating script: ' + error.message);
161
+ }
162
  });
163
+
164
+ extractDataBtn.addEventListener('click', async () => {
165
+ try {
166
+ const response = await fetch('/extract', {
167
+ method: 'POST',
168
+ headers: {
169
+ 'Content-Type': 'application/json'
170
+ },
171
+ body: JSON.stringify({
172
+ html: htmlInput.value,
173
+ script: currentScript
174
+ })
175
+ });
176
+
177
+ const data = await response.json();
178
+
179
+ if (data.success) {
180
+ document.getElementById('extractionOutput').textContent =
181
+ JSON.stringify(data.data, null, 2);
182
+ extractionResults.style.display = 'block';
183
+ Prism.highlightAll();
184
+ } else {
185
+ alert('Extraction failed: ' + data.error);
186
+ }
187
+ } catch (error) {
188
+ alert('Error during extraction: ' + error.message);
189
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
190
  });
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
191
  </script>
192
+ </body>
193
  </html>