pvanand's picture
Update public/index.html
7225716 verified
raw
history blame
12.7 kB
<body>
<div class="container">
<h1>HTML Compressor for LLM</h1>
<p>Compress HTML content for optimal LLM processing while preserving essential structure.</p>
<form id="compressorForm">
<textarea
name="html"
id="htmlInput"
placeholder="Paste your HTML here or upload a file..."
></textarea>
<div class="options-container">
<h3>Compression Options</h3>
<div class="option-grid">
<div class="option-item">
<input type="checkbox" id="cleanHead" name="cleanHead" checked>
<label for="cleanHead">Clean head section</label>
</div>
<div class="option-item">
<input type="checkbox" id="removeScripts" name="removeScripts" checked>
<label for="removeScripts">Remove scripts</label>
</div>
<div class="option-item">
<input type="checkbox" id="removeStyles" name="removeStyles" checked>
<label for="removeStyles">Remove styles</label>
</div>
<div class="option-item">
<input type="checkbox" id="handleRepeatingElements" name="handleRepeatingElements" checked>
<label for="handleRepeatingElements">Handle repeating elements</label>
</div>
<div class="option-item">
<input type="checkbox" id="truncateText" name="truncateText" checked>
<label for="truncateText">Truncate text</label>
</div>
<div class="option-item">
<label for="truncateLength">Max text length:</label>
<input type="number" id="truncateLength" name="truncateLength" value="100" min="10" max="1000">
</div>
<div class="option-item">
<input type="checkbox" id="minifyHtml" name="minifyHtml" checked>
<label for="minifyHtml">Minify HTML</label>
</div>
<div class="option-item">
<input type="checkbox" id="removeMedia" name="removeMedia" checked>
<label for="removeMedia">Remove media</label>
</div>
</div>
</div>
<div class="extraction-container">
<h3>Data Extraction</h3>
<textarea
id="extractionQuery"
placeholder="Enter your extraction query (e.g., 'extract product title and price')"
></textarea>
<div class="button-group">
<button type="button" id="generateScript">Generate Extraction Script</button>
<button type="button" id="executeExtraction" disabled>Execute Extraction</button>
</div>
</div>
<div class="button-group">
<input type="file" accept=".html,.htm" id="fileInput">
<button type="submit">Process HTML</button>
</div>
</form>
<div id="operationStatus" class="operation-status" style="display: none;">
<h3>Operation Status</h3>
<div class="status-grid"></div>
</div>
<div id="stats" class="stats-grid" style="display: none;"></div>
<div class="results-container" style="display: none;">
<div class="results-tabs">
<div class="tab active" data-view="html">Compressed HTML</div>
<div class="tab" data-view="json">JSON Structure</div>
<div class="tab" data-view="extraction">Extraction Results</div>
</div>
<div class="result-panel" id="htmlView">
<div class="result-header">
<h3>HTML Output</h3>
<div class="button-group">
<button onclick="copyResult('html')">Copy</button>
<button onclick="downloadResult('html')">Download</button>
</div>
</div>
<div class="result-content">
<pre><code class="language-html" id="htmlOutput"></code></pre>
</div>
</div>
<div class="result-panel" id="jsonView" style="display: none;">
<div class="result-header">
<h3>JSON Structure</h3>
<div class="button-group">
<button onclick="copyResult('json')">Copy</button>
<button onclick="downloadResult('json')">Download</button>
</div>
</div>
<div class="result-content">
<pre><code class="language-json" id="jsonOutput"></code></pre>
</div>
</div>
<div class="result-panel" id="extractionView" style="display: none;">
<div class="result-header">
<h3>Extraction Results</h3>
<div class="button-group">
<button onclick="copyResult('extraction')">Copy</button>
<button onclick="downloadResult('extraction')">Download</button>
</div>
</div>
<div class="result-content">
<pre><code class="language-json" id="extractionOutput"></code></pre>
</div>
</div>
</div>
<div class="copy-feedback">Copied to clipboard!</div>
</div>
<script src="https://cdnjs.cloudflare.com/ajax/libs/prism/1.24.1/prism.min.js"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/prism/1.24.1/components/prism-markup.min.js"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/prism/1.24.1/components/prism-json.min.js"></script>
<script>
const API_URL = 'https://elevatics-ai-web-scraper-chat.hf.space/api/v1/generate-cheerio-script';
const API_KEY = 'ae54a922-ed3a-4634-be4a-4e4dd470800a';
let currentCheerioScript = null;
const form = document.getElementById('compressorForm');
const fileInput = document.getElementById('fileInput');
const htmlInput = document.getElementById('htmlInput');
const resultsContainer = document.querySelector('.results-container');
const statsContainer = document.getElementById('stats');
const copyFeedback = document.querySelector('.copy-feedback');
// Tab switching
document.querySelectorAll('.tab').forEach(tab => {
tab.addEventListener('click', () => {
// Update tabs
document.querySelectorAll('.tab').forEach(t => t.classList.remove('active'));
tab.classList.add('active');
// Update views
const view = tab.dataset.view;
document.getElementById('htmlView').style.display = view === 'html' ? 'block' : 'none';
document.getElementById('jsonView').style.display = view === 'json' ? 'block' : 'none';
document.getElementById('extractionView').style.display = view === 'extraction' ? 'block' : 'none';
});
});
// File input handler
fileInput.addEventListener('change', (e) => {
const file = e.target.files[0];
if (file) {
const reader = new FileReader();
reader.onload = (e) => htmlInput.value = e.target.result;
reader.readAsText(file);
}
});
// Cheerio script generation
async function generateCheerioScript() {
const htmlContent = document.getElementById('htmlOutput').textContent;
const userInput = document.getElementById('extractionQuery').value;
if (!htmlContent || !userInput) {
alert('Please process HTML and enter an extraction query first');
return;
}
try {
const response = await fetch(API_URL, {
method: 'POST',
headers: {
'accept': 'application/json',
'x-api-key': API_KEY,
'Content-Type': 'application/json',
},
body: JSON.stringify({
html: htmlContent,
user_input: userInput
})
});
const data = await response.json();
if (data.status === 'success') {
currentCheerioScript = data.cheerio_script;
document.getElementById('executeExtraction').disabled = false;
// Show the script in the JSON view
document.getElementById('jsonOutput').textContent = JSON.stringify({
cheerio_script: currentCheerioScript
}, null, 2);
Prism.highlightAll();
} else {
alert('Failed to generate extraction script');
}
} catch (error) {
alert('Error generating script: ' + error.message);
}
}
// Execute extraction
async function executeExtraction() {
if (!currentCheerioScript) {
alert('Please generate an extraction script first');
return;
}
const htmlContent = document.getElementById('htmlOutput').textContent;
try {
const response = await fetch('/extract', {
method: 'POST',
headers: {
'Content-Type': 'application/json'
},
body: JSON.stringify({
html: htmlContent,
script: currentCheerioScript
})
});
const data = await response.json();
document.getElementById('extractionOutput').textContent =
JSON.stringify(data, null, 2);
Prism.highlightAll();
// Switch to extraction view
document.querySelector('[data-view="extraction"]').click();
} catch (error) {
alert('Error executing extraction: ' + error.message);
}
}
// Form submission
form.addEventListener('submit', async (e) => {
e.preventDefault();
const formData = new FormData(form);
// Add checkbox states
document.querySelectorAll('input[type="checkbox"]').forEach(checkbox => {
formData.set(checkbox.name, checkbox.checked);
});
try {
const response = await fetch('/process', {
method: 'POST',
body: formData,
});
const data = await response.json();
if (data.error) {
alert(data.error);
return;
}
// Display operation status
const statusContainer = document.querySelector('#operationStatus');
const statusGrid = statusContainer.querySelector('.status-grid');
statusContainer.style.display = 'block';
statusGrid.innerHTML = Object.entries(data.operationStatus)
.map(([operation, status]) => `
<div class="status-item">
<div class="status-icon ${status.success ? 'status-success' : 'status-error'}">
${status.success ? '✓' : '✗'}
</div>
<div>
<div>${formatLabel(operation)}</div>
${status.error ? `<div class="status-message">Error: ${status.error}</div>` : ''}
</div>
</div>
`).join('');
// Display stats
statsContainer.style.display = 'grid';
statsContainer.innerHTML = Object.entries(data.stats)
.map(([key, value]) => `
<div class="stat-item">
<div class="stat-label">${formatLabel(key)}</div>
<div class="stat-value">${value}</div>
</div>
`).join('');
// Show results container
resultsContainer.style.display = 'block';
// Update outputs with syntax highlighting
document.getElementById('htmlOutput').textContent = data.result.html;
document.getElementById('jsonOutput').textContent = data.result.json;
// Trigger Prism highlighting
Prism.highlightAll();
} catch (err) {
alert('Error processing HTML: ' + err.message);
}
});
// Event listeners for extraction
document.getElementById('generateScript').addEventListener('click', generateCheerioScript);
document.getElementById('executeExtraction').addEventListener('click', executeExtraction);
// Utility functions
function formatLabel(key) {
return key
.replace(/([A-Z])/g, ' $1')
.replace(/([a-z])([A-Z])/g, '$1 $2')
.toLowerCase()
.replace(/^./, str => str.toUpperCase())
.replace('Html', 'HTML');
}
async function copyResult(type) {
const content = document.getElementById(`${type}Output`).textContent;
try {
await navigator.clipboard.writeText(content);
showCopyFeedback();
} catch (err) {
alert('Failed to copy to clipboard');
}
}
function downloadResult(type) {
const content = document.getElementById(`${type}Output`).textContent;
const blob = new Blob([content], { type: 'text/plain' });
const url = URL.createObjectURL(blob);
const a = document.createElement('a');
a.href = url;
a.download = `compressed.${type}`;
document.body.appendChild(a);
a.click();
document.body.removeChild(a);
URL.revokeObjectURL(url);
}
function showCopyFeedback() {
copyFeedback.style.display = 'block';
setTimeout(() => {
copyFeedback.style.display = 'none';
}, 2000);
}
</script>
</body>