Update public/index.html
Browse files- public/index.html +118 -515
public/index.html
CHANGED
@@ -49,542 +49,145 @@
|
|
49 |
border-radius: 8px;
|
50 |
margin: 20px 0;
|
51 |
}
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
}
|
58 |
-
|
59 |
-
.option-item {
|
60 |
-
display: flex;
|
61 |
-
align-items: center;
|
62 |
-
gap: 10px;
|
63 |
-
}
|
64 |
-
|
65 |
-
.button-group {
|
66 |
-
display: flex;
|
67 |
-
gap: 10px;
|
68 |
-
margin: 15px 0;
|
69 |
-
}
|
70 |
-
|
71 |
-
button {
|
72 |
-
background: var(--primary-color);
|
73 |
-
color: white;
|
74 |
-
padding: 8px 16px;
|
75 |
-
border: none;
|
76 |
-
border-radius: 4px;
|
77 |
-
cursor: pointer;
|
78 |
-
transition: background 0.2s;
|
79 |
-
}
|
80 |
-
|
81 |
-
button:hover {
|
82 |
-
background: #0056b3;
|
83 |
-
}
|
84 |
-
|
85 |
-
.results-container {
|
86 |
-
margin-top: 30px;
|
87 |
-
}
|
88 |
-
|
89 |
-
.results-tabs {
|
90 |
-
display: flex;
|
91 |
-
gap: 10px;
|
92 |
-
margin-bottom: 15px;
|
93 |
-
}
|
94 |
-
|
95 |
-
.tab {
|
96 |
-
padding: 8px 16px;
|
97 |
-
cursor: pointer;
|
98 |
-
border: 1px solid var(--border-color);
|
99 |
-
border-radius: 4px;
|
100 |
-
transition: all 0.2s;
|
101 |
-
}
|
102 |
-
|
103 |
-
.tab.active {
|
104 |
-
background: var(--primary-color);
|
105 |
-
color: white;
|
106 |
-
}
|
107 |
-
|
108 |
-
.result-panel {
|
109 |
-
border: 1px solid var(--border-color);
|
110 |
-
border-radius: 4px;
|
111 |
-
overflow: hidden;
|
112 |
-
}
|
113 |
-
|
114 |
-
.result-header {
|
115 |
-
display: flex;
|
116 |
-
justify-content: space-between;
|
117 |
-
align-items: center;
|
118 |
-
padding: 10px;
|
119 |
-
background: var(--background-color);
|
120 |
-
border-bottom: 1px solid var(--border-color);
|
121 |
-
}
|
122 |
-
|
123 |
-
.result-content {
|
124 |
-
padding: 15px;
|
125 |
-
overflow: auto;
|
126 |
-
max-height: 500px;
|
127 |
-
}
|
128 |
-
|
129 |
-
.stats-grid {
|
130 |
-
display: grid;
|
131 |
-
grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
|
132 |
-
gap: 15px;
|
133 |
-
margin: 20px 0;
|
134 |
-
}
|
135 |
-
|
136 |
-
.stat-item {
|
137 |
-
background: white;
|
138 |
-
padding: 15px;
|
139 |
-
border-radius: 4px;
|
140 |
-
border: 1px solid var(--border-color);
|
141 |
-
}
|
142 |
-
|
143 |
-
.stat-value {
|
144 |
-
font-size: 1.2em;
|
145 |
-
font-weight: bold;
|
146 |
-
color: var(--primary-color);
|
147 |
-
}
|
148 |
-
|
149 |
-
.copy-feedback {
|
150 |
-
position: fixed;
|
151 |
-
bottom: 20px;
|
152 |
-
right: 20px;
|
153 |
-
background: var(--success-color);
|
154 |
-
color: white;
|
155 |
-
padding: 10px 20px;
|
156 |
-
border-radius: 4px;
|
157 |
-
display: none;
|
158 |
-
}
|
159 |
-
|
160 |
-
.operation-status {
|
161 |
-
margin: 20px 0;
|
162 |
-
padding: 15px;
|
163 |
-
border: 1px solid var(--border-color);
|
164 |
-
border-radius: 4px;
|
165 |
-
}
|
166 |
-
|
167 |
-
.status-grid {
|
168 |
-
display: grid;
|
169 |
-
grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
|
170 |
-
gap: 10px;
|
171 |
-
margin-top: 10px;
|
172 |
-
}
|
173 |
-
|
174 |
-
.status-item {
|
175 |
-
display: flex;
|
176 |
-
align-items: center;
|
177 |
-
gap: 8px;
|
178 |
-
padding: 8px;
|
179 |
-
border-radius: 4px;
|
180 |
-
background: var(--background-color);
|
181 |
}
|
182 |
|
183 |
-
.
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
align-items: center;
|
189 |
-
justify-content: center;
|
190 |
-
color: white;
|
191 |
-
font-size: 12px;
|
192 |
}
|
193 |
|
194 |
-
|
195 |
-
|
|
|
|
|
|
|
|
|
196 |
}
|
197 |
|
198 |
-
|
199 |
-
|
|
|
|
|
|
|
200 |
}
|
201 |
|
202 |
-
.
|
203 |
-
|
204 |
-
|
205 |
-
|
|
|
|
|
206 |
}
|
207 |
-
|
208 |
-
pre {
|
209 |
-
margin: 0;
|
210 |
-
border-radius: 4px;
|
211 |
-
}
|
212 |
-
|
213 |
-
code {
|
214 |
-
font-family: 'Monaco', 'Menlo', monospace;
|
215 |
-
font-size: 14px;
|
216 |
-
}
|
217 |
-
/* Add to the existing style section */
|
218 |
-
.extraction-container {
|
219 |
-
margin: 20px 0;
|
220 |
-
padding: 20px;
|
221 |
-
background: var(--background-color);
|
222 |
-
border-radius: 8px;
|
223 |
-
}
|
224 |
-
|
225 |
-
.extraction-container textarea {
|
226 |
-
height: 100px;
|
227 |
-
margin-bottom: 10px;
|
228 |
-
}
|
229 |
-
|
230 |
-
#executeExtraction:disabled {
|
231 |
-
background: var(--secondary-color);
|
232 |
-
cursor: not-allowed;
|
233 |
-
}
|
234 |
-
</style>
|
235 |
</head>
|
236 |
<body>
|
237 |
<div class="container">
|
238 |
-
|
239 |
-
|
240 |
-
|
241 |
-
|
242 |
-
|
243 |
-
|
244 |
-
|
245 |
-
|
246 |
-
|
247 |
-
|
248 |
-
|
249 |
-
|
250 |
-
|
251 |
-
|
252 |
-
|
253 |
-
|
254 |
-
</div>
|
255 |
-
<div class="option-item">
|
256 |
-
<input type="checkbox" id="removeScripts" name="removeScripts" checked>
|
257 |
-
<label for="removeScripts">Remove scripts</label>
|
258 |
-
</div>
|
259 |
-
<div class="option-item">
|
260 |
-
<input type="checkbox" id="removeStyles" name="removeStyles" checked>
|
261 |
-
<label for="removeStyles">Remove styles</label>
|
262 |
-
</div>
|
263 |
-
<div class="option-item">
|
264 |
-
<input type="checkbox" id="handleRepeatingElements" name="handleRepeatingElements" checked>
|
265 |
-
<label for="handleRepeatingElements">Handle repeating elements</label>
|
266 |
-
</div>
|
267 |
-
<div class="option-item">
|
268 |
-
<input type="checkbox" id="truncateText" name="truncateText" checked>
|
269 |
-
<label for="truncateText">Truncate text</label>
|
270 |
-
</div>
|
271 |
-
<div class="option-item">
|
272 |
-
<label for="truncateLength">Max text length:</label>
|
273 |
-
<input type="number" id="truncateLength" name="truncateLength" value="100" min="10" max="1000">
|
274 |
-
</div>
|
275 |
-
<div class="option-item">
|
276 |
-
<input type="checkbox" id="minifyHtml" name="minifyHtml" checked>
|
277 |
-
<label for="minifyHtml">Minify HTML</label>
|
278 |
-
</div>
|
279 |
-
<div class="option-item">
|
280 |
-
<input type="checkbox" id="removeMedia" name="removeMedia" checked>
|
281 |
-
<label for="removeMedia">Remove media</label>
|
282 |
-
</div>
|
283 |
-
</div>
|
284 |
-
</div>
|
285 |
-
|
286 |
-
<div class="extraction-container">
|
287 |
-
<h3>Data Extraction</h3>
|
288 |
-
<textarea
|
289 |
-
id="extractionQuery"
|
290 |
-
placeholder="Enter your extraction query (e.g., 'extract product title and price')"
|
291 |
-
></textarea>
|
292 |
-
<div class="button-group">
|
293 |
-
<button type="button" id="generateScript">Generate Extraction Script</button>
|
294 |
-
<button type="button" id="executeExtraction" disabled>Execute Extraction</button>
|
295 |
-
</div>
|
296 |
-
</div>
|
297 |
-
|
298 |
-
<div class="button-group">
|
299 |
-
<input type="file" accept=".html,.htm" id="fileInput">
|
300 |
-
<button type="submit">Process HTML</button>
|
301 |
-
</div>
|
302 |
-
</form>
|
303 |
-
|
304 |
-
<div id="operationStatus" class="operation-status" style="display: none;">
|
305 |
-
<h3>Operation Status</h3>
|
306 |
-
<div class="status-grid"></div>
|
307 |
-
</div>
|
308 |
-
|
309 |
-
<div id="stats" class="stats-grid" style="display: none;"></div>
|
310 |
-
|
311 |
-
<div class="results-container" style="display: none;">
|
312 |
-
<div class="results-tabs">
|
313 |
-
<div class="tab active" data-view="html">Compressed HTML</div>
|
314 |
-
<div class="tab" data-view="json">JSON Structure</div>
|
315 |
-
<div class="tab" data-view="extraction">Extraction Results</div>
|
316 |
-
</div>
|
317 |
-
|
318 |
-
<div class="result-panel" id="htmlView">
|
319 |
-
<div class="result-header">
|
320 |
-
<h3>HTML Output</h3>
|
321 |
<div class="button-group">
|
322 |
-
|
323 |
-
|
324 |
</div>
|
325 |
-
</div>
|
326 |
-
<div class="result-content">
|
327 |
-
<pre><code class="language-html" id="htmlOutput"></code></pre>
|
328 |
-
</div>
|
329 |
</div>
|
330 |
-
|
331 |
-
<div class="
|
332 |
-
|
333 |
-
<
|
334 |
-
<div class="button-group">
|
335 |
-
<button onclick="copyResult('json')">Copy</button>
|
336 |
-
<button onclick="downloadResult('json')">Download</button>
|
337 |
-
</div>
|
338 |
-
</div>
|
339 |
-
<div class="result-content">
|
340 |
-
<pre><code class="language-json" id="jsonOutput"></code></pre>
|
341 |
-
</div>
|
342 |
</div>
|
343 |
-
|
344 |
-
<div class="
|
345 |
-
|
346 |
-
<h3>Extraction Results</h3>
|
347 |
-
<div class="button-group">
|
348 |
-
<button onclick="copyResult('extraction')">Copy</button>
|
349 |
-
<button onclick="downloadResult('extraction')">Download</button>
|
350 |
-
</div>
|
351 |
-
</div>
|
352 |
-
<div class="result-content">
|
353 |
<pre><code class="language-json" id="extractionOutput"></code></pre>
|
354 |
-
</div>
|
355 |
</div>
|
356 |
-
</div>
|
357 |
-
|
358 |
-
<div class="copy-feedback">Copied to clipboard!</div>
|
359 |
</div>
|
360 |
-
|
361 |
-
<script src="https://cdnjs.cloudflare.com/ajax/libs/prism/1.24.1/prism.min.js"></script>
|
362 |
-
<script src="https://cdnjs.cloudflare.com/ajax/libs/prism/1.24.1/components/prism-markup.min.js"></script>
|
363 |
-
<script src="https://cdnjs.cloudflare.com/ajax/libs/prism/1.24.1/components/prism-json.min.js"></script>
|
364 |
<script>
|
365 |
-
|
366 |
-
|
367 |
-
|
368 |
-
|
369 |
-
|
370 |
-
|
371 |
-
|
372 |
-
|
373 |
-
|
374 |
-
|
375 |
-
|
376 |
-
|
377 |
-
|
378 |
-
|
379 |
-
|
380 |
-
|
381 |
-
|
382 |
-
|
383 |
-
|
384 |
-
|
385 |
-
|
386 |
-
|
387 |
-
|
388 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
389 |
});
|
390 |
-
|
391 |
-
|
392 |
-
|
393 |
-
|
394 |
-
|
395 |
-
|
396 |
-
|
397 |
-
|
398 |
-
|
399 |
-
|
400 |
-
|
401 |
-
|
402 |
-
|
403 |
-
|
404 |
-
|
405 |
-
|
406 |
-
|
407 |
-
|
408 |
-
|
409 |
-
|
410 |
-
|
411 |
-
|
412 |
-
|
413 |
-
|
414 |
-
|
415 |
-
|
416 |
-
|
417 |
-
'x-api-key': API_KEY,
|
418 |
-
'Content-Type': 'application/json',
|
419 |
-
},
|
420 |
-
body: JSON.stringify({
|
421 |
-
html: htmlContent,
|
422 |
-
user_input: userInput
|
423 |
-
})
|
424 |
-
});
|
425 |
-
|
426 |
-
const data = await response.json();
|
427 |
-
|
428 |
-
if (data.status === 'success') {
|
429 |
-
currentCheerioScript = data.cheerio_script;
|
430 |
-
document.getElementById('executeExtraction').disabled = false;
|
431 |
-
|
432 |
-
// Show the script in the JSON view
|
433 |
-
document.getElementById('jsonOutput').textContent = JSON.stringify({
|
434 |
-
cheerio_script: currentCheerioScript
|
435 |
-
}, null, 2);
|
436 |
-
Prism.highlightAll();
|
437 |
-
} else {
|
438 |
-
alert('Failed to generate extraction script');
|
439 |
-
}
|
440 |
-
} catch (error) {
|
441 |
-
alert('Error generating script: ' + error.message);
|
442 |
-
}
|
443 |
-
}
|
444 |
-
|
445 |
-
// Execute extraction
|
446 |
-
async function executeExtraction() {
|
447 |
-
if (!currentCheerioScript) {
|
448 |
-
alert('Please generate an extraction script first');
|
449 |
-
return;
|
450 |
-
}
|
451 |
-
|
452 |
-
const htmlContent = document.getElementById('htmlOutput').textContent;
|
453 |
-
|
454 |
-
try {
|
455 |
-
const response = await fetch('/extract', {
|
456 |
-
method: 'POST',
|
457 |
-
headers: {
|
458 |
-
'Content-Type': 'application/json'
|
459 |
-
},
|
460 |
-
body: JSON.stringify({
|
461 |
-
html: htmlContent,
|
462 |
-
script: currentCheerioScript
|
463 |
-
})
|
464 |
-
});
|
465 |
-
|
466 |
-
const data = await response.json();
|
467 |
-
|
468 |
-
document.getElementById('extractionOutput').textContent =
|
469 |
-
JSON.stringify(data, null, 2);
|
470 |
-
Prism.highlightAll();
|
471 |
-
|
472 |
-
// Switch to extraction view
|
473 |
-
document.querySelector('[data-view="extraction"]').click();
|
474 |
-
} catch (error) {
|
475 |
-
alert('Error executing extraction: ' + error.message);
|
476 |
-
}
|
477 |
-
}
|
478 |
-
|
479 |
-
// Form submission
|
480 |
-
form.addEventListener('submit', async (e) => {
|
481 |
-
e.preventDefault();
|
482 |
-
|
483 |
-
const formData = new FormData(form);
|
484 |
-
|
485 |
-
// Add checkbox states
|
486 |
-
document.querySelectorAll('input[type="checkbox"]').forEach(checkbox => {
|
487 |
-
formData.set(checkbox.name, checkbox.checked);
|
488 |
});
|
489 |
-
|
490 |
-
try {
|
491 |
-
const response = await fetch('/process', {
|
492 |
-
method: 'POST',
|
493 |
-
body: formData,
|
494 |
-
});
|
495 |
-
|
496 |
-
const data = await response.json();
|
497 |
-
|
498 |
-
if (data.error) {
|
499 |
-
alert(data.error);
|
500 |
-
return;
|
501 |
-
}
|
502 |
-
|
503 |
-
// Display operation status
|
504 |
-
const statusContainer = document.querySelector('#operationStatus');
|
505 |
-
const statusGrid = statusContainer.querySelector('.status-grid');
|
506 |
-
statusContainer.style.display = 'block';
|
507 |
-
|
508 |
-
statusGrid.innerHTML = Object.entries(data.operationStatus)
|
509 |
-
.map(([operation, status]) => `
|
510 |
-
<div class="status-item">
|
511 |
-
<div class="status-icon ${status.success ? 'status-success' : 'status-error'}">
|
512 |
-
${status.success ? '✓' : '✗'}
|
513 |
-
</div>
|
514 |
-
<div>
|
515 |
-
<div>${formatLabel(operation)}</div>
|
516 |
-
${status.error ? `<div class="status-message">Error: ${status.error}</div>` : ''}
|
517 |
-
</div>
|
518 |
-
</div>
|
519 |
-
`).join('');
|
520 |
-
|
521 |
-
// Display stats
|
522 |
-
statsContainer.style.display = 'grid';
|
523 |
-
statsContainer.innerHTML = Object.entries(data.stats)
|
524 |
-
.map(([key, value]) => `
|
525 |
-
<div class="stat-item">
|
526 |
-
<div class="stat-label">${formatLabel(key)}</div>
|
527 |
-
<div class="stat-value">${value}</div>
|
528 |
-
</div>
|
529 |
-
`).join('');
|
530 |
-
|
531 |
-
// Show results container
|
532 |
-
resultsContainer.style.display = 'block';
|
533 |
-
|
534 |
-
// Update outputs with syntax highlighting
|
535 |
-
document.getElementById('htmlOutput').textContent = data.result.html;
|
536 |
-
document.getElementById('jsonOutput').textContent = data.result.json;
|
537 |
-
|
538 |
-
// Trigger Prism highlighting
|
539 |
-
Prism.highlightAll();
|
540 |
-
} catch (err) {
|
541 |
-
alert('Error processing HTML: ' + err.message);
|
542 |
-
}
|
543 |
-
});
|
544 |
-
|
545 |
-
// Event listeners for extraction
|
546 |
-
document.getElementById('generateScript').addEventListener('click', generateCheerioScript);
|
547 |
-
document.getElementById('executeExtraction').addEventListener('click', executeExtraction);
|
548 |
-
|
549 |
-
// Utility functions
|
550 |
-
function formatLabel(key) {
|
551 |
-
return key
|
552 |
-
.replace(/([A-Z])/g, ' $1')
|
553 |
-
.replace(/([a-z])([A-Z])/g, '$1 $2')
|
554 |
-
.toLowerCase()
|
555 |
-
.replace(/^./, str => str.toUpperCase())
|
556 |
-
.replace('Html', 'HTML');
|
557 |
-
}
|
558 |
-
|
559 |
-
async function copyResult(type) {
|
560 |
-
const content = document.getElementById(`${type}Output`).textContent;
|
561 |
-
try {
|
562 |
-
await navigator.clipboard.writeText(content);
|
563 |
-
showCopyFeedback();
|
564 |
-
} catch (err) {
|
565 |
-
alert('Failed to copy to clipboard');
|
566 |
-
}
|
567 |
-
}
|
568 |
-
|
569 |
-
function downloadResult(type) {
|
570 |
-
const content = document.getElementById(`${type}Output`).textContent;
|
571 |
-
const blob = new Blob([content], { type: 'text/plain' });
|
572 |
-
const url = URL.createObjectURL(blob);
|
573 |
-
const a = document.createElement('a');
|
574 |
-
a.href = url;
|
575 |
-
a.download = `compressed.${type}`;
|
576 |
-
document.body.appendChild(a);
|
577 |
-
a.click();
|
578 |
-
document.body.removeChild(a);
|
579 |
-
URL.revokeObjectURL(url);
|
580 |
-
}
|
581 |
-
|
582 |
-
function showCopyFeedback() {
|
583 |
-
copyFeedback.style.display = 'block';
|
584 |
-
setTimeout(() => {
|
585 |
-
copyFeedback.style.display = 'none';
|
586 |
-
}, 2000);
|
587 |
-
}
|
588 |
</script>
|
589 |
-
|
590 |
</html>
|
|
|
49 |
border-radius: 8px;
|
50 |
margin: 20px 0;
|
51 |
}
|
52 |
+
.input-section {
|
53 |
+
margin-bottom: 30px;
|
54 |
+
padding: 20px;
|
55 |
+
border: 1px solid var(--border-color);
|
56 |
+
border-radius: 8px;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
57 |
}
|
58 |
|
59 |
+
.script-section {
|
60 |
+
margin: 20px 0;
|
61 |
+
padding: 20px;
|
62 |
+
background: var(--background-color);
|
63 |
+
border-radius: 8px;
|
|
|
|
|
|
|
|
|
64 |
}
|
65 |
|
66 |
+
#userInput {
|
67 |
+
width: 100%;
|
68 |
+
padding: 12px;
|
69 |
+
margin-bottom: 15px;
|
70 |
+
border: 1px solid var(--border-color);
|
71 |
+
border-radius: 4px;
|
72 |
}
|
73 |
|
74 |
+
#scriptOutput {
|
75 |
+
background: white;
|
76 |
+
padding: 15px;
|
77 |
+
border-radius: 4px;
|
78 |
+
margin-top: 15px;
|
79 |
}
|
80 |
|
81 |
+
.extraction-results {
|
82 |
+
margin-top: 20px;
|
83 |
+
padding: 15px;
|
84 |
+
background: white;
|
85 |
+
border-radius: 4px;
|
86 |
+
border: 1px solid var(--border-color);
|
87 |
}
|
88 |
+
</style>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
89 |
</head>
|
90 |
<body>
|
91 |
<div class="container">
|
92 |
+
<h1>HTML Data Extractor</h1>
|
93 |
+
|
94 |
+
<div class="input-section">
|
95 |
+
<h2>Input HTML</h2>
|
96 |
+
<textarea
|
97 |
+
id="htmlInput"
|
98 |
+
placeholder="Paste your HTML here..."
|
99 |
+
></textarea>
|
100 |
+
|
101 |
+
<h3>Extraction Query</h3>
|
102 |
+
<input
|
103 |
+
type="text"
|
104 |
+
id="userInput"
|
105 |
+
placeholder="Describe what data you want to extract..."
|
106 |
+
>
|
107 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
108 |
<div class="button-group">
|
109 |
+
<button id="generateScript">Generate Script</button>
|
110 |
+
<button id="extractData" disabled>Extract Data</button>
|
111 |
</div>
|
|
|
|
|
|
|
|
|
112 |
</div>
|
113 |
+
|
114 |
+
<div class="script-section" style="display: none;">
|
115 |
+
<h2>Generated Cheerio Script</h2>
|
116 |
+
<pre><code class="language-javascript" id="scriptOutput"></code></pre>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
117 |
</div>
|
118 |
+
|
119 |
+
<div class="extraction-results" style="display: none;">
|
120 |
+
<h2>Extracted Data</h2>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
121 |
<pre><code class="language-json" id="extractionOutput"></code></pre>
|
|
|
122 |
</div>
|
|
|
|
|
|
|
123 |
</div>
|
124 |
+
|
|
|
|
|
|
|
125 |
<script>
|
126 |
+
const generateScriptBtn = document.getElementById('generateScript');
|
127 |
+
const extractDataBtn = document.getElementById('extractData');
|
128 |
+
const htmlInput = document.getElementById('htmlInput');
|
129 |
+
const userInput = document.getElementById('userInput');
|
130 |
+
const scriptSection = document.querySelector('.script-section');
|
131 |
+
const extractionResults = document.querySelector('.extraction-results');
|
132 |
+
|
133 |
+
let currentScript = '';
|
134 |
+
|
135 |
+
generateScriptBtn.addEventListener('click', async () => {
|
136 |
+
try {
|
137 |
+
const response = await fetch('https://elevatics-ai-web-scraper-chat.hf.space/api/v1/generate-cheerio-script', {
|
138 |
+
method: 'POST',
|
139 |
+
headers: {
|
140 |
+
'accept': 'application/json',
|
141 |
+
'x-api-key': 'ae54a922-ed3a-4634-be4a-4e4dd470800a',
|
142 |
+
'Content-Type': 'application/json',
|
143 |
+
},
|
144 |
+
body: JSON.stringify({
|
145 |
+
html: htmlInput.value,
|
146 |
+
user_input: userInput.value
|
147 |
+
})
|
148 |
+
});
|
149 |
+
|
150 |
+
const data = await response.json();
|
151 |
+
|
152 |
+
if (data.status === 'success') {
|
153 |
+
currentScript = data.cheerio_script;
|
154 |
+
document.getElementById('scriptOutput').textContent = currentScript;
|
155 |
+
scriptSection.style.display = 'block';
|
156 |
+
extractDataBtn.disabled = false;
|
157 |
+
Prism.highlightAll();
|
158 |
+
}
|
159 |
+
} catch (error) {
|
160 |
+
alert('Error generating script: ' + error.message);
|
161 |
+
}
|
162 |
});
|
163 |
+
|
164 |
+
extractDataBtn.addEventListener('click', async () => {
|
165 |
+
try {
|
166 |
+
const response = await fetch('/extract', {
|
167 |
+
method: 'POST',
|
168 |
+
headers: {
|
169 |
+
'Content-Type': 'application/json'
|
170 |
+
},
|
171 |
+
body: JSON.stringify({
|
172 |
+
html: htmlInput.value,
|
173 |
+
script: currentScript
|
174 |
+
})
|
175 |
+
});
|
176 |
+
|
177 |
+
const data = await response.json();
|
178 |
+
|
179 |
+
if (data.success) {
|
180 |
+
document.getElementById('extractionOutput').textContent =
|
181 |
+
JSON.stringify(data.data, null, 2);
|
182 |
+
extractionResults.style.display = 'block';
|
183 |
+
Prism.highlightAll();
|
184 |
+
} else {
|
185 |
+
alert('Extraction failed: ' + data.error);
|
186 |
+
}
|
187 |
+
} catch (error) {
|
188 |
+
alert('Error during extraction: ' + error.message);
|
189 |
+
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
190 |
});
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
191 |
</script>
|
192 |
+
</body>
|
193 |
</html>
|