pvanand commited on
Commit
f7737bb
·
verified ·
1 Parent(s): 7225716

Update public/index.html

Browse files
Files changed (1) hide show
  1. public/index.html +555 -319
public/index.html CHANGED
@@ -1,354 +1,590 @@
1
- <body>
2
- <div class="container">
3
- <h1>HTML Compressor for LLM</h1>
4
- <p>Compress HTML content for optimal LLM processing while preserving essential structure.</p>
5
-
6
- <form id="compressorForm">
7
- <textarea
8
- name="html"
9
- id="htmlInput"
10
- placeholder="Paste your HTML here or upload a file..."
11
- ></textarea>
12
-
13
- <div class="options-container">
14
- <h3>Compression Options</h3>
15
- <div class="option-grid">
16
- <div class="option-item">
17
- <input type="checkbox" id="cleanHead" name="cleanHead" checked>
18
- <label for="cleanHead">Clean head section</label>
19
- </div>
20
- <div class="option-item">
21
- <input type="checkbox" id="removeScripts" name="removeScripts" checked>
22
- <label for="removeScripts">Remove scripts</label>
23
- </div>
24
- <div class="option-item">
25
- <input type="checkbox" id="removeStyles" name="removeStyles" checked>
26
- <label for="removeStyles">Remove styles</label>
27
- </div>
28
- <div class="option-item">
29
- <input type="checkbox" id="handleRepeatingElements" name="handleRepeatingElements" checked>
30
- <label for="handleRepeatingElements">Handle repeating elements</label>
31
- </div>
32
- <div class="option-item">
33
- <input type="checkbox" id="truncateText" name="truncateText" checked>
34
- <label for="truncateText">Truncate text</label>
35
- </div>
36
- <div class="option-item">
37
- <label for="truncateLength">Max text length:</label>
38
- <input type="number" id="truncateLength" name="truncateLength" value="100" min="10" max="1000">
39
- </div>
40
- <div class="option-item">
41
- <input type="checkbox" id="minifyHtml" name="minifyHtml" checked>
42
- <label for="minifyHtml">Minify HTML</label>
43
- </div>
44
- <div class="option-item">
45
- <input type="checkbox" id="removeMedia" name="removeMedia" checked>
46
- <label for="removeMedia">Remove media</label>
47
- </div>
48
- </div>
49
- </div>
50
-
51
- <div class="extraction-container">
52
- <h3>Data Extraction</h3>
53
- <textarea
54
- id="extractionQuery"
55
- placeholder="Enter your extraction query (e.g., 'extract product title and price')"
56
- ></textarea>
57
- <div class="button-group">
58
- <button type="button" id="generateScript">Generate Extraction Script</button>
59
- <button type="button" id="executeExtraction" disabled>Execute Extraction</button>
60
- </div>
61
- </div>
62
-
63
- <div class="button-group">
64
- <input type="file" accept=".html,.htm" id="fileInput">
65
- <button type="submit">Process HTML</button>
66
- </div>
67
- </form>
68
-
69
- <div id="operationStatus" class="operation-status" style="display: none;">
70
- <h3>Operation Status</h3>
71
- <div class="status-grid"></div>
72
- </div>
73
 
74
- <div id="stats" class="stats-grid" style="display: none;"></div>
 
 
 
 
 
 
75
 
76
- <div class="results-container" style="display: none;">
77
- <div class="results-tabs">
78
- <div class="tab active" data-view="html">Compressed HTML</div>
79
- <div class="tab" data-view="json">JSON Structure</div>
80
- <div class="tab" data-view="extraction">Extraction Results</div>
81
- </div>
 
 
82
 
83
- <div class="result-panel" id="htmlView">
84
- <div class="result-header">
85
- <h3>HTML Output</h3>
86
- <div class="button-group">
87
- <button onclick="copyResult('html')">Copy</button>
88
- <button onclick="downloadResult('html')">Download</button>
89
- </div>
90
- </div>
91
- <div class="result-content">
92
- <pre><code class="language-html" id="htmlOutput"></code></pre>
93
- </div>
94
- </div>
95
 
96
- <div class="result-panel" id="jsonView" style="display: none;">
97
- <div class="result-header">
98
- <h3>JSON Structure</h3>
99
- <div class="button-group">
100
- <button onclick="copyResult('json')">Copy</button>
101
- <button onclick="downloadResult('json')">Download</button>
102
- </div>
103
- </div>
104
- <div class="result-content">
105
- <pre><code class="language-json" id="jsonOutput"></code></pre>
106
- </div>
107
- </div>
108
 
109
- <div class="result-panel" id="extractionView" style="display: none;">
110
- <div class="result-header">
111
- <h3>Extraction Results</h3>
112
- <div class="button-group">
113
- <button onclick="copyResult('extraction')">Copy</button>
114
- <button onclick="downloadResult('extraction')">Download</button>
115
- </div>
116
- </div>
117
- <div class="result-content">
118
- <pre><code class="language-json" id="extractionOutput"></code></pre>
119
- </div>
120
- </div>
121
- </div>
122
 
123
- <div class="copy-feedback">Copied to clipboard!</div>
124
- </div>
 
 
 
125
 
126
- <script src="https://cdnjs.cloudflare.com/ajax/libs/prism/1.24.1/prism.min.js"></script>
127
- <script src="https://cdnjs.cloudflare.com/ajax/libs/prism/1.24.1/components/prism-markup.min.js"></script>
128
- <script src="https://cdnjs.cloudflare.com/ajax/libs/prism/1.24.1/components/prism-json.min.js"></script>
129
- <script>
130
- const API_URL = 'https://elevatics-ai-web-scraper-chat.hf.space/api/v1/generate-cheerio-script';
131
- const API_KEY = 'ae54a922-ed3a-4634-be4a-4e4dd470800a';
132
 
133
- let currentCheerioScript = null;
 
 
 
 
 
 
 
 
134
 
135
- const form = document.getElementById('compressorForm');
136
- const fileInput = document.getElementById('fileInput');
137
- const htmlInput = document.getElementById('htmlInput');
138
- const resultsContainer = document.querySelector('.results-container');
139
- const statsContainer = document.getElementById('stats');
140
- const copyFeedback = document.querySelector('.copy-feedback');
141
 
142
- // Tab switching
143
- document.querySelectorAll('.tab').forEach(tab => {
144
- tab.addEventListener('click', () => {
145
- // Update tabs
146
- document.querySelectorAll('.tab').forEach(t => t.classList.remove('active'));
147
- tab.classList.add('active');
148
-
149
- // Update views
150
- const view = tab.dataset.view;
151
- document.getElementById('htmlView').style.display = view === 'html' ? 'block' : 'none';
152
- document.getElementById('jsonView').style.display = view === 'json' ? 'block' : 'none';
153
- document.getElementById('extractionView').style.display = view === 'extraction' ? 'block' : 'none';
154
- });
155
- });
156
 
157
- // File input handler
158
- fileInput.addEventListener('change', (e) => {
159
- const file = e.target.files[0];
160
- if (file) {
161
- const reader = new FileReader();
162
- reader.onload = (e) => htmlInput.value = e.target.result;
163
- reader.readAsText(file);
164
- }
165
- });
166
 
167
- // Cheerio script generation
168
- async function generateCheerioScript() {
169
- const htmlContent = document.getElementById('htmlOutput').textContent;
170
- const userInput = document.getElementById('extractionQuery').value;
 
 
 
171
 
172
- if (!htmlContent || !userInput) {
173
- alert('Please process HTML and enter an extraction query first');
174
- return;
175
- }
176
 
177
- try {
178
- const response = await fetch(API_URL, {
179
- method: 'POST',
180
- headers: {
181
- 'accept': 'application/json',
182
- 'x-api-key': API_KEY,
183
- 'Content-Type': 'application/json',
184
- },
185
- body: JSON.stringify({
186
- html: htmlContent,
187
- user_input: userInput
188
- })
189
- });
190
 
191
- const data = await response.json();
192
-
193
- if (data.status === 'success') {
194
- currentCheerioScript = data.cheerio_script;
195
- document.getElementById('executeExtraction').disabled = false;
196
-
197
- // Show the script in the JSON view
198
- document.getElementById('jsonOutput').textContent = JSON.stringify({
199
- cheerio_script: currentCheerioScript
200
- }, null, 2);
201
- Prism.highlightAll();
202
- } else {
203
- alert('Failed to generate extraction script');
204
- }
205
- } catch (error) {
206
- alert('Error generating script: ' + error.message);
207
- }
208
  }
209
 
210
- // Execute extraction
211
- async function executeExtraction() {
212
- if (!currentCheerioScript) {
213
- alert('Please generate an extraction script first');
214
- return;
215
- }
216
 
217
- const htmlContent = document.getElementById('htmlOutput').textContent;
 
 
 
 
 
218
 
219
- try {
220
- const response = await fetch('/extract', {
221
- method: 'POST',
222
- headers: {
223
- 'Content-Type': 'application/json'
224
- },
225
- body: JSON.stringify({
226
- html: htmlContent,
227
- script: currentCheerioScript
228
- })
229
- });
230
 
231
- const data = await response.json();
232
-
233
- document.getElementById('extractionOutput').textContent =
234
- JSON.stringify(data, null, 2);
235
- Prism.highlightAll();
236
-
237
- // Switch to extraction view
238
- document.querySelector('[data-view="extraction"]').click();
239
- } catch (error) {
240
- alert('Error executing extraction: ' + error.message);
241
- }
242
  }
243
 
244
- // Form submission
245
- form.addEventListener('submit', async (e) => {
246
- e.preventDefault();
247
-
248
- const formData = new FormData(form);
249
-
250
- // Add checkbox states
251
- document.querySelectorAll('input[type="checkbox"]').forEach(checkbox => {
252
- formData.set(checkbox.name, checkbox.checked);
253
- });
254
-
255
- try {
256
- const response = await fetch('/process', {
257
- method: 'POST',
258
- body: formData,
259
- });
260
-
261
- const data = await response.json();
262
-
263
- if (data.error) {
264
- alert(data.error);
265
- return;
266
- }
267
 
268
- // Display operation status
269
- const statusContainer = document.querySelector('#operationStatus');
270
- const statusGrid = statusContainer.querySelector('.status-grid');
271
- statusContainer.style.display = 'block';
272
-
273
- statusGrid.innerHTML = Object.entries(data.operationStatus)
274
- .map(([operation, status]) => `
275
- <div class="status-item">
276
- <div class="status-icon ${status.success ? 'status-success' : 'status-error'}">
277
- ${status.success ? '✓' : '✗'}
278
- </div>
279
- <div>
280
- <div>${formatLabel(operation)}</div>
281
- ${status.error ? `<div class="status-message">Error: ${status.error}</div>` : ''}
282
- </div>
283
- </div>
284
- `).join('');
285
 
286
- // Display stats
287
- statsContainer.style.display = 'grid';
288
- statsContainer.innerHTML = Object.entries(data.stats)
289
- .map(([key, value]) => `
290
- <div class="stat-item">
291
- <div class="stat-label">${formatLabel(key)}</div>
292
- <div class="stat-value">${value}</div>
293
- </div>
294
- `).join('');
295
 
296
- // Show results container
297
- resultsContainer.style.display = 'block';
 
 
 
 
 
 
298
 
299
- // Update outputs with syntax highlighting
300
- document.getElementById('htmlOutput').textContent = data.result.html;
301
- document.getElementById('jsonOutput').textContent = data.result.json;
302
-
303
- // Trigger Prism highlighting
304
- Prism.highlightAll();
305
- } catch (err) {
306
- alert('Error processing HTML: ' + err.message);
307
- }
308
- });
309
 
310
- // Event listeners for extraction
311
- document.getElementById('generateScript').addEventListener('click', generateCheerioScript);
312
- document.getElementById('executeExtraction').addEventListener('click', executeExtraction);
313
 
314
- // Utility functions
315
- function formatLabel(key) {
316
- return key
317
- .replace(/([A-Z])/g, ' $1')
318
- .replace(/([a-z])([A-Z])/g, '$1 $2')
319
- .toLowerCase()
320
- .replace(/^./, str => str.toUpperCase())
321
- .replace('Html', 'HTML');
322
  }
323
 
324
- async function copyResult(type) {
325
- const content = document.getElementById(`${type}Output`).textContent;
326
- try {
327
- await navigator.clipboard.writeText(content);
328
- showCopyFeedback();
329
- } catch (err) {
330
- alert('Failed to copy to clipboard');
331
- }
332
  }
333
 
334
- function downloadResult(type) {
335
- const content = document.getElementById(`${type}Output`).textContent;
336
- const blob = new Blob([content], { type: 'text/plain' });
337
- const url = URL.createObjectURL(blob);
338
- const a = document.createElement('a');
339
- a.href = url;
340
- a.download = `compressed.${type}`;
341
- document.body.appendChild(a);
342
- a.click();
343
- document.body.removeChild(a);
344
- URL.revokeObjectURL(url);
345
  }
346
 
347
- function showCopyFeedback() {
348
- copyFeedback.style.display = 'block';
349
- setTimeout(() => {
350
- copyFeedback.style.display = 'none';
351
- }, 2000);
352
  }
353
- </script>
354
- </body>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html>
3
+ <head>
4
+ <meta charset="utf-8"/>
5
+ <title>HTML Compressor for LLM</title>
6
+ <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/prism/1.24.1/themes/prism.min.css">
7
+ <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/prism/1.24.1/plugins/toolbar/prism-toolbar.min.css">
8
+ <style>
9
+ :root {
10
+ --primary-color: #007bff;
11
+ --secondary-color: #6c757d;
12
+ --success-color: #28a745;
13
+ --border-color: #dee2e6;
14
+ --background-color: #f8f9fa;
15
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
+ body {
18
+ font-family: system-ui, -apple-system, sans-serif;
19
+ line-height: 1.6;
20
+ margin: 0;
21
+ padding: 20px;
22
+ background: var(--background-color);
23
+ }
24
 
25
+ .container {
26
+ max-width: 1200px;
27
+ margin: 0 auto;
28
+ background: white;
29
+ padding: 30px;
30
+ border-radius: 8px;
31
+ box-shadow: 0 2px 4px rgba(0,0,0,0.1);
32
+ }
33
 
34
+ textarea {
35
+ width: 100%;
36
+ height: 200px;
37
+ padding: 12px;
38
+ border: 1px solid var(--border-color);
39
+ border-radius: 4px;
40
+ font-family: 'Monaco', 'Menlo', monospace;
41
+ font-size: 14px;
42
+ resize: vertical;
43
+ margin-bottom: 15px;
44
+ }
 
45
 
46
+ .options-container {
47
+ background: var(--background-color);
48
+ padding: 20px;
49
+ border-radius: 8px;
50
+ margin: 20px 0;
51
+ }
 
 
 
 
 
 
52
 
53
+ .option-grid {
54
+ display: grid;
55
+ grid-template-columns: repeat(auto-fit, minmax(250px, 1fr));
56
+ gap: 15px;
57
+ }
 
 
 
 
 
 
 
 
58
 
59
+ .option-item {
60
+ display: flex;
61
+ align-items: center;
62
+ gap: 10px;
63
+ }
64
 
65
+ .button-group {
66
+ display: flex;
67
+ gap: 10px;
68
+ margin: 15px 0;
69
+ }
 
70
 
71
+ button {
72
+ background: var(--primary-color);
73
+ color: white;
74
+ padding: 8px 16px;
75
+ border: none;
76
+ border-radius: 4px;
77
+ cursor: pointer;
78
+ transition: background 0.2s;
79
+ }
80
 
81
+ button:hover {
82
+ background: #0056b3;
83
+ }
 
 
 
84
 
85
+ .results-container {
86
+ margin-top: 30px;
87
+ }
 
 
 
 
 
 
 
 
 
 
 
88
 
89
+ .results-tabs {
90
+ display: flex;
91
+ gap: 10px;
92
+ margin-bottom: 15px;
93
+ }
 
 
 
 
94
 
95
+ .tab {
96
+ padding: 8px 16px;
97
+ cursor: pointer;
98
+ border: 1px solid var(--border-color);
99
+ border-radius: 4px;
100
+ transition: all 0.2s;
101
+ }
102
 
103
+ .tab.active {
104
+ background: var(--primary-color);
105
+ color: white;
106
+ }
107
 
108
+ .result-panel {
109
+ border: 1px solid var(--border-color);
110
+ border-radius: 4px;
111
+ overflow: hidden;
112
+ }
 
 
 
 
 
 
 
 
113
 
114
+ .result-header {
115
+ display: flex;
116
+ justify-content: space-between;
117
+ align-items: center;
118
+ padding: 10px;
119
+ background: var(--background-color);
120
+ border-bottom: 1px solid var(--border-color);
 
 
 
 
 
 
 
 
 
 
121
  }
122
 
123
+ .result-content {
124
+ padding: 15px;
125
+ overflow: auto;
126
+ max-height: 500px;
127
+ }
 
128
 
129
+ .stats-grid {
130
+ display: grid;
131
+ grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
132
+ gap: 15px;
133
+ margin: 20px 0;
134
+ }
135
 
136
+ .stat-item {
137
+ background: white;
138
+ padding: 15px;
139
+ border-radius: 4px;
140
+ border: 1px solid var(--border-color);
141
+ }
 
 
 
 
 
142
 
143
+ .stat-value {
144
+ font-size: 1.2em;
145
+ font-weight: bold;
146
+ color: var(--primary-color);
 
 
 
 
 
 
 
147
  }
148
 
149
+ .copy-feedback {
150
+ position: fixed;
151
+ bottom: 20px;
152
+ right: 20px;
153
+ background: var(--success-color);
154
+ color: white;
155
+ padding: 10px 20px;
156
+ border-radius: 4px;
157
+ display: none;
158
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
159
 
160
+ .operation-status {
161
+ margin: 20px 0;
162
+ padding: 15px;
163
+ border: 1px solid var(--border-color);
164
+ border-radius: 4px;
165
+ }
 
 
 
 
 
 
 
 
 
 
 
166
 
167
+ .status-grid {
168
+ display: grid;
169
+ grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
170
+ gap: 10px;
171
+ margin-top: 10px;
172
+ }
 
 
 
173
 
174
+ .status-item {
175
+ display: flex;
176
+ align-items: center;
177
+ gap: 8px;
178
+ padding: 8px;
179
+ border-radius: 4px;
180
+ background: var(--background-color);
181
+ }
182
 
183
+ .status-icon {
184
+ width: 20px;
185
+ height: 20px;
186
+ border-radius: 50%;
187
+ display: flex;
188
+ align-items: center;
189
+ justify-content: center;
190
+ color: white;
191
+ font-size: 12px;
192
+ }
193
 
194
+ .status-success {
195
+ background: var(--success-color);
196
+ }
197
 
198
+ .status-error {
199
+ background: #dc3545;
 
 
 
 
 
 
200
  }
201
 
202
+ .status-message {
203
+ font-size: 0.9em;
204
+ color: #666;
205
+ margin-top: 4px;
 
 
 
 
206
  }
207
 
208
+ pre {
209
+ margin: 0;
210
+ border-radius: 4px;
 
 
 
 
 
 
 
 
211
  }
212
 
213
+ code {
214
+ font-family: 'Monaco', 'Menlo', monospace;
215
+ font-size: 14px;
 
 
216
  }
217
+ /* Add to the existing style section */
218
+ .extraction-container {
219
+ margin: 20px 0;
220
+ padding: 20px;
221
+ background: var(--background-color);
222
+ border-radius: 8px;
223
+ }
224
+
225
+ .extraction-container textarea {
226
+ height: 100px;
227
+ margin-bottom: 10px;
228
+ }
229
+
230
+ #executeExtraction:disabled {
231
+ background: var(--secondary-color);
232
+ cursor: not-allowed;
233
+ }
234
+ </style>
235
+ </head>
236
+ <body>
237
+ <div class="container">
238
+ <h1>HTML Compressor for LLM</h1>
239
+ <p>Compress HTML content for optimal LLM processing while preserving essential structure.</p>
240
+
241
+ <form id="compressorForm">
242
+ <textarea
243
+ name="html"
244
+ id="htmlInput"
245
+ placeholder="Paste your HTML here or upload a file..."
246
+ ></textarea>
247
+
248
+ <div class="options-container">
249
+ <h3>Compression Options</h3>
250
+ <div class="option-grid">
251
+ <div class="option-item">
252
+ <input type="checkbox" id="cleanHead" name="cleanHead" checked>
253
+ <label for="cleanHead">Clean head section</label>
254
+ </div>
255
+ <div class="option-item">
256
+ <input type="checkbox" id="removeScripts" name="removeScripts" checked>
257
+ <label for="removeScripts">Remove scripts</label>
258
+ </div>
259
+ <div class="option-item">
260
+ <input type="checkbox" id="removeStyles" name="removeStyles" checked>
261
+ <label for="removeStyles">Remove styles</label>
262
+ </div>
263
+ <div class="option-item">
264
+ <input type="checkbox" id="handleRepeatingElements" name="handleRepeatingElements" checked>
265
+ <label for="handleRepeatingElements">Handle repeating elements</label>
266
+ </div>
267
+ <div class="option-item">
268
+ <input type="checkbox" id="truncateText" name="truncateText" checked>
269
+ <label for="truncateText">Truncate text</label>
270
+ </div>
271
+ <div class="option-item">
272
+ <label for="truncateLength">Max text length:</label>
273
+ <input type="number" id="truncateLength" name="truncateLength" value="100" min="10" max="1000">
274
+ </div>
275
+ <div class="option-item">
276
+ <input type="checkbox" id="minifyHtml" name="minifyHtml" checked>
277
+ <label for="minifyHtml">Minify HTML</label>
278
+ </div>
279
+ <div class="option-item">
280
+ <input type="checkbox" id="removeMedia" name="removeMedia" checked>
281
+ <label for="removeMedia">Remove media</label>
282
+ </div>
283
+ </div>
284
+ </div>
285
+
286
+ <div class="extraction-container">
287
+ <h3>Data Extraction</h3>
288
+ <textarea
289
+ id="extractionQuery"
290
+ placeholder="Enter your extraction query (e.g., 'extract product title and price')"
291
+ ></textarea>
292
+ <div class="button-group">
293
+ <button type="button" id="generateScript">Generate Extraction Script</button>
294
+ <button type="button" id="executeExtraction" disabled>Execute Extraction</button>
295
+ </div>
296
+ </div>
297
+
298
+ <div class="button-group">
299
+ <input type="file" accept=".html,.htm" id="fileInput">
300
+ <button type="submit">Process HTML</button>
301
+ </div>
302
+ </form>
303
+
304
+ <div id="operationStatus" class="operation-status" style="display: none;">
305
+ <h3>Operation Status</h3>
306
+ <div class="status-grid"></div>
307
+ </div>
308
+
309
+ <div id="stats" class="stats-grid" style="display: none;"></div>
310
+
311
+ <div class="results-container" style="display: none;">
312
+ <div class="results-tabs">
313
+ <div class="tab active" data-view="html">Compressed HTML</div>
314
+ <div class="tab" data-view="json">JSON Structure</div>
315
+ <div class="tab" data-view="extraction">Extraction Results</div>
316
+ </div>
317
+
318
+ <div class="result-panel" id="htmlView">
319
+ <div class="result-header">
320
+ <h3>HTML Output</h3>
321
+ <div class="button-group">
322
+ <button onclick="copyResult('html')">Copy</button>
323
+ <button onclick="downloadResult('html')">Download</button>
324
+ </div>
325
+ </div>
326
+ <div class="result-content">
327
+ <pre><code class="language-html" id="htmlOutput"></code></pre>
328
+ </div>
329
+ </div>
330
+
331
+ <div class="result-panel" id="jsonView" style="display: none;">
332
+ <div class="result-header">
333
+ <h3>JSON Structure</h3>
334
+ <div class="button-group">
335
+ <button onclick="copyResult('json')">Copy</button>
336
+ <button onclick="downloadResult('json')">Download</button>
337
+ </div>
338
+ </div>
339
+ <div class="result-content">
340
+ <pre><code class="language-json" id="jsonOutput"></code></pre>
341
+ </div>
342
+ </div>
343
+
344
+ <div class="result-panel" id="extractionView" style="display: none;">
345
+ <div class="result-header">
346
+ <h3>Extraction Results</h3>
347
+ <div class="button-group">
348
+ <button onclick="copyResult('extraction')">Copy</button>
349
+ <button onclick="downloadResult('extraction')">Download</button>
350
+ </div>
351
+ </div>
352
+ <div class="result-content">
353
+ <pre><code class="language-json" id="extractionOutput"></code></pre>
354
+ </div>
355
+ </div>
356
+ </div>
357
+
358
+ <div class="copy-feedback">Copied to clipboard!</div>
359
+ </div>
360
+
361
+ <script src="https://cdnjs.cloudflare.com/ajax/libs/prism/1.24.1/prism.min.js"></script>
362
+ <script src="https://cdnjs.cloudflare.com/ajax/libs/prism/1.24.1/components/prism-markup.min.js"></script>
363
+ <script src="https://cdnjs.cloudflare.com/ajax/libs/prism/1.24.1/components/prism-json.min.js"></script>
364
+ <script>
365
+ const API_URL = 'https://elevatics-ai-web-scraper-chat.hf.space/api/v1/generate-cheerio-script';
366
+ const API_KEY = 'ae54a922-ed3a-4634-be4a-4e4dd470800a';
367
+
368
+ let currentCheerioScript = null;
369
+
370
+ const form = document.getElementById('compressorForm');
371
+ const fileInput = document.getElementById('fileInput');
372
+ const htmlInput = document.getElementById('htmlInput');
373
+ const resultsContainer = document.querySelector('.results-container');
374
+ const statsContainer = document.getElementById('stats');
375
+ const copyFeedback = document.querySelector('.copy-feedback');
376
+
377
+ // Tab switching
378
+ document.querySelectorAll('.tab').forEach(tab => {
379
+ tab.addEventListener('click', () => {
380
+ // Update tabs
381
+ document.querySelectorAll('.tab').forEach(t => t.classList.remove('active'));
382
+ tab.classList.add('active');
383
+
384
+ // Update views
385
+ const view = tab.dataset.view;
386
+ document.getElementById('htmlView').style.display = view === 'html' ? 'block' : 'none';
387
+ document.getElementById('jsonView').style.display = view === 'json' ? 'block' : 'none';
388
+ document.getElementById('extractionView').style.display = view === 'extraction' ? 'block' : 'none';
389
+ });
390
+ });
391
+
392
+ // File input handler
393
+ fileInput.addEventListener('change', (e) => {
394
+ const file = e.target.files[0];
395
+ if (file) {
396
+ const reader = new FileReader();
397
+ reader.onload = (e) => htmlInput.value = e.target.result;
398
+ reader.readAsText(file);
399
+ }
400
+ });
401
+
402
+ // Cheerio script generation
403
+ async function generateCheerioScript() {
404
+ const htmlContent = document.getElementById('htmlOutput').textContent;
405
+ const userInput = document.getElementById('extractionQuery').value;
406
+
407
+ if (!htmlContent || !userInput) {
408
+ alert('Please process HTML and enter an extraction query first');
409
+ return;
410
+ }
411
+
412
+ try {
413
+ const response = await fetch(API_URL, {
414
+ method: 'POST',
415
+ headers: {
416
+ 'accept': 'application/json',
417
+ 'x-api-key': API_KEY,
418
+ 'Content-Type': 'application/json',
419
+ },
420
+ body: JSON.stringify({
421
+ html: htmlContent,
422
+ user_input: userInput
423
+ })
424
+ });
425
+
426
+ const data = await response.json();
427
+
428
+ if (data.status === 'success') {
429
+ currentCheerioScript = data.cheerio_script;
430
+ document.getElementById('executeExtraction').disabled = false;
431
+
432
+ // Show the script in the JSON view
433
+ document.getElementById('jsonOutput').textContent = JSON.stringify({
434
+ cheerio_script: currentCheerioScript
435
+ }, null, 2);
436
+ Prism.highlightAll();
437
+ } else {
438
+ alert('Failed to generate extraction script');
439
+ }
440
+ } catch (error) {
441
+ alert('Error generating script: ' + error.message);
442
+ }
443
+ }
444
+
445
+ // Execute extraction
446
+ async function executeExtraction() {
447
+ if (!currentCheerioScript) {
448
+ alert('Please generate an extraction script first');
449
+ return;
450
+ }
451
+
452
+ const htmlContent = document.getElementById('htmlOutput').textContent;
453
+
454
+ try {
455
+ const response = await fetch('/extract', {
456
+ method: 'POST',
457
+ headers: {
458
+ 'Content-Type': 'application/json'
459
+ },
460
+ body: JSON.stringify({
461
+ html: htmlContent,
462
+ script: currentCheerioScript
463
+ })
464
+ });
465
+
466
+ const data = await response.json();
467
+
468
+ document.getElementById('extractionOutput').textContent =
469
+ JSON.stringify(data, null, 2);
470
+ Prism.highlightAll();
471
+
472
+ // Switch to extraction view
473
+ document.querySelector('[data-view="extraction"]').click();
474
+ } catch (error) {
475
+ alert('Error executing extraction: ' + error.message);
476
+ }
477
+ }
478
+
479
+ // Form submission
480
+ form.addEventListener('submit', async (e) => {
481
+ e.preventDefault();
482
+
483
+ const formData = new FormData(form);
484
+
485
+ // Add checkbox states
486
+ document.querySelectorAll('input[type="checkbox"]').forEach(checkbox => {
487
+ formData.set(checkbox.name, checkbox.checked);
488
+ });
489
+
490
+ try {
491
+ const response = await fetch('/process', {
492
+ method: 'POST',
493
+ body: formData,
494
+ });
495
+
496
+ const data = await response.json();
497
+
498
+ if (data.error) {
499
+ alert(data.error);
500
+ return;
501
+ }
502
+
503
+ // Display operation status
504
+ const statusContainer = document.querySelector('#operationStatus');
505
+ const statusGrid = statusContainer.querySelector('.status-grid');
506
+ statusContainer.style.display = 'block';
507
+
508
+ statusGrid.innerHTML = Object.entries(data.operationStatus)
509
+ .map(([operation, status]) => `
510
+ <div class="status-item">
511
+ <div class="status-icon ${status.success ? 'status-success' : 'status-error'}">
512
+ ${status.success ? '✓' : '✗'}
513
+ </div>
514
+ <div>
515
+ <div>${formatLabel(operation)}</div>
516
+ ${status.error ? `<div class="status-message">Error: ${status.error}</div>` : ''}
517
+ </div>
518
+ </div>
519
+ `).join('');
520
+
521
+ // Display stats
522
+ statsContainer.style.display = 'grid';
523
+ statsContainer.innerHTML = Object.entries(data.stats)
524
+ .map(([key, value]) => `
525
+ <div class="stat-item">
526
+ <div class="stat-label">${formatLabel(key)}</div>
527
+ <div class="stat-value">${value}</div>
528
+ </div>
529
+ `).join('');
530
+
531
+ // Show results container
532
+ resultsContainer.style.display = 'block';
533
+
534
+ // Update outputs with syntax highlighting
535
+ document.getElementById('htmlOutput').textContent = data.result.html;
536
+ document.getElementById('jsonOutput').textContent = data.result.json;
537
+
538
+ // Trigger Prism highlighting
539
+ Prism.highlightAll();
540
+ } catch (err) {
541
+ alert('Error processing HTML: ' + err.message);
542
+ }
543
+ });
544
+
545
+ // Event listeners for extraction
546
+ document.getElementById('generateScript').addEventListener('click', generateCheerioScript);
547
+ document.getElementById('executeExtraction').addEventListener('click', executeExtraction);
548
+
549
+ // Utility functions
550
+ function formatLabel(key) {
551
+ return key
552
+ .replace(/([A-Z])/g, ' $1')
553
+ .replace(/([a-z])([A-Z])/g, '$1 $2')
554
+ .toLowerCase()
555
+ .replace(/^./, str => str.toUpperCase())
556
+ .replace('Html', 'HTML');
557
+ }
558
+
559
+ async function copyResult(type) {
560
+ const content = document.getElementById(`${type}Output`).textContent;
561
+ try {
562
+ await navigator.clipboard.writeText(content);
563
+ showCopyFeedback();
564
+ } catch (err) {
565
+ alert('Failed to copy to clipboard');
566
+ }
567
+ }
568
+
569
+ function downloadResult(type) {
570
+ const content = document.getElementById(`${type}Output`).textContent;
571
+ const blob = new Blob([content], { type: 'text/plain' });
572
+ const url = URL.createObjectURL(blob);
573
+ const a = document.createElement('a');
574
+ a.href = url;
575
+ a.download = `compressed.${type}`;
576
+ document.body.appendChild(a);
577
+ a.click();
578
+ document.body.removeChild(a);
579
+ URL.revokeObjectURL(url);
580
+ }
581
+
582
+ function showCopyFeedback() {
583
+ copyFeedback.style.display = 'block';
584
+ setTimeout(() => {
585
+ copyFeedback.style.display = 'none';
586
+ }, 2000);
587
+ }
588
+ </script>
589
+ </body>
590
+ </html>