Spaces:
Sleeping
Sleeping
<html> | |
<head> | |
<meta charset="utf-8"/> | |
<title>HTML Compressor for LLM</title> | |
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/prism/1.24.1/themes/prism.min.css"> | |
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/prism/1.24.1/plugins/toolbar/prism-toolbar.min.css"> | |
<style> | |
:root { | |
--primary-color: #007bff; | |
--secondary-color: #6c757d; | |
--success-color: #28a745; | |
--border-color: #dee2e6; | |
--background-color: #f8f9fa; | |
} | |
body { | |
font-family: system-ui, -apple-system, sans-serif; | |
line-height: 1.6; | |
margin: 0; | |
padding: 20px; | |
background: var(--background-color); | |
} | |
.container { | |
max-width: 1200px; | |
margin: 0 auto; | |
background: white; | |
padding: 30px; | |
border-radius: 8px; | |
box-shadow: 0 2px 4px rgba(0,0,0,0.1); | |
} | |
textarea { | |
width: 100%; | |
height: 200px; | |
padding: 12px; | |
border: 1px solid var(--border-color); | |
border-radius: 4px; | |
font-family: 'Monaco', 'Menlo', monospace; | |
font-size: 14px; | |
resize: vertical; | |
margin-bottom: 15px; | |
} | |
.options-container { | |
background: var(--background-color); | |
padding: 20px; | |
border-radius: 8px; | |
margin: 20px 0; | |
} | |
.input-section { | |
margin-bottom: 30px; | |
padding: 20px; | |
border: 1px solid var(--border-color); | |
border-radius: 8px; | |
} | |
.script-section { | |
margin: 20px 0; | |
padding: 20px; | |
background: var(--background-color); | |
border-radius: 8px; | |
} | |
#userInput { | |
width: 100%; | |
padding: 12px; | |
margin-bottom: 15px; | |
border: 1px solid var(--border-color); | |
border-radius: 4px; | |
} | |
#scriptOutput { | |
background: white; | |
padding: 15px; | |
border-radius: 4px; | |
margin-top: 15px; | |
} | |
.extraction-results { | |
margin-top: 20px; | |
padding: 15px; | |
background: white; | |
border-radius: 4px; | |
border: 1px solid var(--border-color); | |
} | |
</style> | |
</head> | |
<body> | |
<div class="container"> | |
<h1>HTML Data Extractor</h1> | |
<div class="input-section"> | |
<h2>Input HTML</h2> | |
<textarea | |
id="htmlInput" | |
placeholder="Paste your HTML here..." | |
></textarea> | |
<h3>Extraction Query</h3> | |
<input | |
type="text" | |
id="userInput" | |
placeholder="Describe what data you want to extract..." | |
> | |
<div class="button-group"> | |
<button id="generateScript">Generate Script</button> | |
<button id="extractData" disabled>Extract Data</button> | |
</div> | |
</div> | |
<div class="script-section" style="display: none;"> | |
<h2>Generated Cheerio Script</h2> | |
<pre><code class="language-javascript" id="scriptOutput"></code></pre> | |
</div> | |
<div class="extraction-results" style="display: none;"> | |
<h2>Extracted Data</h2> | |
<pre><code class="language-json" id="extractionOutput"></code></pre> | |
</div> | |
</div> | |
<script> | |
const generateScriptBtn = document.getElementById('generateScript'); | |
const extractDataBtn = document.getElementById('extractData'); | |
const htmlInput = document.getElementById('htmlInput'); | |
const userInput = document.getElementById('userInput'); | |
const scriptSection = document.querySelector('.script-section'); | |
const extractionResults = document.querySelector('.extraction-results'); | |
let currentScript = ''; | |
generateScriptBtn.addEventListener('click', async () => { | |
try { | |
const response = await fetch('https://elevatics-ai-web-scraper-chat.hf.space/api/v1/generate-cheerio-script', { | |
method: 'POST', | |
headers: { | |
'accept': 'application/json', | |
'x-api-key': 'ae54a922-ed3a-4634-be4a-4e4dd470800a', | |
'Content-Type': 'application/json', | |
}, | |
body: JSON.stringify({ | |
html: htmlInput.value, | |
user_input: userInput.value | |
}) | |
}); | |
const data = await response.json(); | |
if (data.status === 'success') { | |
currentScript = data.cheerio_script; | |
document.getElementById('scriptOutput').textContent = currentScript; | |
scriptSection.style.display = 'block'; | |
extractDataBtn.disabled = false; | |
Prism.highlightAll(); | |
} | |
} catch (error) { | |
alert('Error generating script: ' + error.message); | |
} | |
}); | |
extractDataBtn.addEventListener('click', async () => { | |
try { | |
const response = await fetch('/extract', { | |
method: 'POST', | |
headers: { | |
'Content-Type': 'application/json' | |
}, | |
body: JSON.stringify({ | |
html: htmlInput.value, | |
script: currentScript | |
}) | |
}); | |
const data = await response.json(); | |
if (data.success) { | |
document.getElementById('extractionOutput').textContent = | |
JSON.stringify(data.data, null, 2); | |
extractionResults.style.display = 'block'; | |
Prism.highlightAll(); | |
} else { | |
alert('Extraction failed: ' + data.error); | |
} | |
} catch (error) { | |
alert('Error during extraction: ' + error.message); | |
} | |
}); | |
</script> | |
</body> | |
</html> |