|
<!DOCTYPE html> |
|
<html> |
|
<head> |
|
<meta charset="utf-8"/> |
|
<title>HTML Compressor for LLM</title> |
|
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/prism/1.24.1/themes/prism.min.css"> |
|
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/prism/1.24.1/plugins/toolbar/prism-toolbar.min.css"> |
|
<style> |
|
:root { |
|
--primary-color: #007bff; |
|
--secondary-color: #6c757d; |
|
--success-color: #28a745; |
|
--border-color: #dee2e6; |
|
--background-color: #f8f9fa; |
|
} |
|
|
|
body { |
|
font-family: system-ui, -apple-system, sans-serif; |
|
line-height: 1.6; |
|
margin: 0; |
|
padding: 20px; |
|
background: var(--background-color); |
|
} |
|
|
|
.container { |
|
max-width: 1200px; |
|
margin: 0 auto; |
|
background: white; |
|
padding: 30px; |
|
border-radius: 8px; |
|
box-shadow: 0 2px 4px rgba(0,0,0,0.1); |
|
} |
|
|
|
textarea { |
|
width: 100%; |
|
height: 200px; |
|
padding: 12px; |
|
border: 1px solid var(--border-color); |
|
border-radius: 4px; |
|
font-family: 'Monaco', 'Menlo', monospace; |
|
font-size: 14px; |
|
resize: vertical; |
|
margin-bottom: 15px; |
|
} |
|
|
|
.options-container { |
|
background: var(--background-color); |
|
padding: 20px; |
|
border-radius: 8px; |
|
margin: 20px 0; |
|
} |
|
.input-section { |
|
margin-bottom: 30px; |
|
padding: 20px; |
|
border: 1px solid var(--border-color); |
|
border-radius: 8px; |
|
} |
|
|
|
.script-section { |
|
margin: 20px 0; |
|
padding: 20px; |
|
background: var(--background-color); |
|
border-radius: 8px; |
|
} |
|
|
|
#userInput { |
|
width: 100%; |
|
padding: 12px; |
|
margin-bottom: 15px; |
|
border: 1px solid var(--border-color); |
|
border-radius: 4px; |
|
} |
|
|
|
#scriptOutput { |
|
background: white; |
|
padding: 15px; |
|
border-radius: 4px; |
|
margin-top: 15px; |
|
} |
|
|
|
.extraction-results { |
|
margin-top: 20px; |
|
padding: 15px; |
|
background: white; |
|
border-radius: 4px; |
|
border: 1px solid var(--border-color); |
|
} |
|
</style> |
|
</head> |
|
<body> |
|
<div class="container"> |
|
<h1>HTML Data Extractor</h1> |
|
|
|
<div class="input-section"> |
|
<h2>Input HTML</h2> |
|
<textarea |
|
id="htmlInput" |
|
placeholder="Paste your HTML here..." |
|
></textarea> |
|
|
|
<h3>Extraction Query</h3> |
|
<input |
|
type="text" |
|
id="userInput" |
|
placeholder="Describe what data you want to extract..." |
|
> |
|
|
|
<div class="button-group"> |
|
<button id="generateScript">Generate Script</button> |
|
<button id="extractData" disabled>Extract Data</button> |
|
</div> |
|
</div> |
|
|
|
<div class="script-section" style="display: none;"> |
|
<h2>Generated Cheerio Script</h2> |
|
<pre><code class="language-javascript" id="scriptOutput"></code></pre> |
|
</div> |
|
|
|
<div class="extraction-results" style="display: none;"> |
|
<h2>Extracted Data</h2> |
|
<pre><code class="language-json" id="extractionOutput"></code></pre> |
|
</div> |
|
</div> |
|
|
|
<script> |
|
const generateScriptBtn = document.getElementById('generateScript'); |
|
const extractDataBtn = document.getElementById('extractData'); |
|
const htmlInput = document.getElementById('htmlInput'); |
|
const userInput = document.getElementById('userInput'); |
|
const scriptSection = document.querySelector('.script-section'); |
|
const extractionResults = document.querySelector('.extraction-results'); |
|
|
|
let currentScript = ''; |
|
|
|
generateScriptBtn.addEventListener('click', async () => { |
|
try { |
|
const response = await fetch('https://elevatics-ai-web-scraper-chat.hf.space/api/v1/generate-cheerio-script', { |
|
method: 'POST', |
|
headers: { |
|
'accept': 'application/json', |
|
'x-api-key': 'ae54a922-ed3a-4634-be4a-4e4dd470800a', |
|
'Content-Type': 'application/json', |
|
}, |
|
body: JSON.stringify({ |
|
html: htmlInput.value, |
|
user_input: userInput.value |
|
}) |
|
}); |
|
|
|
const data = await response.json(); |
|
|
|
if (data.status === 'success') { |
|
currentScript = data.cheerio_script; |
|
document.getElementById('scriptOutput').textContent = currentScript; |
|
scriptSection.style.display = 'block'; |
|
extractDataBtn.disabled = false; |
|
Prism.highlightAll(); |
|
} |
|
} catch (error) { |
|
alert('Error generating script: ' + error.message); |
|
} |
|
}); |
|
|
|
extractDataBtn.addEventListener('click', async () => { |
|
try { |
|
const response = await fetch('/extract', { |
|
method: 'POST', |
|
headers: { |
|
'Content-Type': 'application/json' |
|
}, |
|
body: JSON.stringify({ |
|
html: htmlInput.value, |
|
script: currentScript |
|
}) |
|
}); |
|
|
|
const data = await response.json(); |
|
|
|
if (data.success) { |
|
document.getElementById('extractionOutput').textContent = |
|
JSON.stringify(data.data, null, 2); |
|
extractionResults.style.display = 'block'; |
|
Prism.highlightAll(); |
|
} else { |
|
alert('Extraction failed: ' + data.error); |
|
} |
|
} catch (error) { |
|
alert('Error during extraction: ' + error.message); |
|
} |
|
}); |
|
</script> |
|
</body> |
|
</html> |