|
<!DOCTYPE html> |
|
<html lang="ar" dir="rtl"> |
|
<head> |
|
<meta charset="UTF-8"> |
|
<meta name="viewport" content="width=device-width, initial-scale=1.0"> |
|
<title>موندو لينجوا - نظام OCR</title> |
|
<link href="https://cdn.jsdelivr.net/npm/[email protected]/dist/css/bootstrap.min.css" rel="stylesheet"> |
|
<script src="https://cdnjs.cloudflare.com/ajax/libs/pdf.js/3.4.120/pdf.min.js"></script> |
|
<style> |
|
body { |
|
font-family: Arial, sans-serif; |
|
margin: 0; |
|
padding: 20px; |
|
background-color: #f8f9fa; |
|
} |
|
.header { |
|
background-color: #3b82f6; |
|
color: white; |
|
padding: 15px; |
|
border-radius: 10px; |
|
margin-bottom: 20px; |
|
text-align: center; |
|
} |
|
.container { |
|
max-width: 800px; |
|
margin: 0 auto; |
|
} |
|
.card { |
|
background-color: white; |
|
border-radius: 10px; |
|
box-shadow: 0 2px 5px rgba(0,0,0,0.1); |
|
padding: 20px; |
|
margin-bottom: 20px; |
|
} |
|
.pdf-grid { |
|
display: grid; |
|
grid-template-columns: repeat(auto-fill, minmax(120px, 1fr)); |
|
gap: 10px; |
|
margin-top: 15px; |
|
} |
|
.pdf-page { |
|
border: 1px solid #ddd; |
|
border-radius: 5px; |
|
padding: 5px; |
|
position: relative; |
|
cursor: pointer; |
|
transition: all 0.2s; |
|
} |
|
.pdf-page:hover { |
|
transform: translateY(-3px); |
|
box-shadow: 0 3px 10px rgba(0,0,0,0.1); |
|
} |
|
.pdf-page.selected { |
|
border: 2px solid #3b82f6; |
|
} |
|
.pdf-page img { |
|
width: 100%; |
|
height: auto; |
|
border-radius: 3px; |
|
} |
|
.page-number { |
|
position: absolute; |
|
bottom: 0; |
|
left: 0; |
|
right: 0; |
|
background-color: rgba(0,0,0,0.6); |
|
color: white; |
|
text-align: center; |
|
font-size: 12px; |
|
padding: 2px; |
|
} |
|
.btn-primary { |
|
background-color: #3b82f6; |
|
border-color: #3b82f6; |
|
} |
|
.btn-success { |
|
background-color: #10b981; |
|
border-color: #10b981; |
|
} |
|
.result-text { |
|
max-height: 300px; |
|
overflow-y: auto; |
|
white-space: pre-wrap; |
|
direction: rtl; |
|
border: 1px solid #ddd; |
|
padding: 10px; |
|
border-radius: 5px; |
|
background-color: #f8f9fa; |
|
} |
|
.spinner-border { |
|
width: 1.5rem; |
|
height: 1.5rem; |
|
margin-left: 0.5rem; |
|
} |
|
.logo { |
|
font-weight: bold; |
|
font-size: 24px; |
|
display: inline-block; |
|
margin-bottom: 5px; |
|
} |
|
.logo span { |
|
color: #bfdbfe; |
|
} |
|
</style> |
|
</head> |
|
<body> |
|
<div class="container"> |
|
<div class="header"> |
|
<div class="logo">مـوندو <span>لينجـوا</span></div> |
|
<h1>نظام التعرف الضوئي على النصوص</h1> |
|
<p class="mb-0">استخراج النصوص من الصور والملفات متعددة الصفحات</p> |
|
</div> |
|
|
|
|
|
<div class="card"> |
|
<h3>تحميل الملف</h3> |
|
<div class="mb-3"> |
|
<input type="file" class="form-control" id="fileInput" accept=".pdf,.jpg,.jpeg,.png"> |
|
<div class="form-text">يمكنك تحميل ملف PDF (حتى 100 صفحة) أو صورة</div> |
|
</div> |
|
<button id="processBtn" class="btn btn-primary">معالجة الملف</button> |
|
</div> |
|
|
|
|
|
<div id="processingStatus" class="card d-none"> |
|
<h3>حالة المعالجة</h3> |
|
<div class="alert alert-info"> |
|
<div class="d-flex align-items-center"> |
|
<div class="spinner-border text-primary" role="status"></div> |
|
<span id="statusText" class="ms-2">جاري معالجة الملف...</span> |
|
</div> |
|
</div> |
|
<div class="progress mt-2"> |
|
<div id="progressBar" class="progress-bar" role="progressbar" style="width: 0%"></div> |
|
</div> |
|
</div> |
|
|
|
|
|
<div id="pdfPagesCard" class="card d-none"> |
|
<h3>صفحات الملف</h3> |
|
<p>اختر الصفحات التي تريد معالجتها (انقر للتحديد)</p> |
|
|
|
<div class="mb-3"> |
|
<button id="selectAllBtn" class="btn btn-sm btn-outline-primary me-2">تحديد الكل</button> |
|
<button id="deselectAllBtn" class="btn btn-sm btn-outline-secondary">إلغاء تحديد الكل</button> |
|
</div> |
|
|
|
<div id="pdfPagesContainer" class="pdf-grid"></div> |
|
|
|
<div class="mt-3"> |
|
<button id="extractImagesBtn" class="btn btn-success me-2">تحويل إلى صور</button> |
|
<button id="extractTextBtn" class="btn btn-primary">استخراج النص</button> |
|
</div> |
|
</div> |
|
|
|
|
|
<div id="resultsCard" class="card d-none"> |
|
<h3>النص المستخرج</h3> |
|
<div id="resultText" class="result-text mt-3"> |
|
لم يتم استخراج نص بعد. |
|
</div> |
|
<div class="mt-3"> |
|
<button id="copyTextBtn" class="btn btn-outline-primary me-2">نسخ النص</button> |
|
<button id="downloadTextBtn" class="btn btn-success">تنزيل النص</button> |
|
</div> |
|
</div> |
|
</div> |
|
|
|
<script> |
|
|
|
pdfjsLib.GlobalWorkerOptions.workerSrc = 'https://cdnjs.cloudflare.com/ajax/libs/pdf.js/3.4.120/pdf.worker.min.js'; |
|
|
|
|
|
const RAPIDAPI_KEY = 'eb11693cddmshb8bd157e05b74acp1f6aa4jsn4369fa546e55'; |
|
const OCR_API_URL = 'https://ocr43.p.rapidapi.com/v1/results'; |
|
|
|
|
|
let documentPages = []; |
|
let selectedPages = []; |
|
let extractedTexts = []; |
|
|
|
|
|
const fileInput = document.getElementById('fileInput'); |
|
const processBtn = document.getElementById('processBtn'); |
|
const processingStatus = document.getElementById('processingStatus'); |
|
const statusText = document.getElementById('statusText'); |
|
const progressBar = document.getElementById('progressBar'); |
|
const pdfPagesCard = document.getElementById('pdfPagesCard'); |
|
const pdfPagesContainer = document.getElementById('pdfPagesContainer'); |
|
const selectAllBtn = document.getElementById('selectAllBtn'); |
|
const deselectAllBtn = document.getElementById('deselectAllBtn'); |
|
const extractImagesBtn = document.getElementById('extractImagesBtn'); |
|
const extractTextBtn = document.getElementById('extractTextBtn'); |
|
const resultsCard = document.getElementById('resultsCard'); |
|
const resultText = document.getElementById('resultText'); |
|
const copyTextBtn = document.getElementById('copyTextBtn'); |
|
const downloadTextBtn = document.getElementById('downloadTextBtn'); |
|
|
|
|
|
document.addEventListener('DOMContentLoaded', function() { |
|
processBtn.addEventListener('click', processFile); |
|
selectAllBtn.addEventListener('click', selectAllPages); |
|
deselectAllBtn.addEventListener('click', deselectAllPages); |
|
extractImagesBtn.addEventListener('click', extractImages); |
|
extractTextBtn.addEventListener('click', extractText); |
|
copyTextBtn.addEventListener('click', copyText); |
|
downloadTextBtn.addEventListener('click', downloadText); |
|
}); |
|
|
|
|
|
async function processFile() { |
|
if (!fileInput.files || fileInput.files.length === 0) { |
|
alert('الرجاء اختيار ملف أولاً'); |
|
return; |
|
} |
|
|
|
const file = fileInput.files[0]; |
|
|
|
|
|
processingStatus.classList.remove('d-none'); |
|
statusText.textContent = 'جاري معالجة الملف...'; |
|
progressBar.style.width = '0%'; |
|
|
|
|
|
pdfPagesCard.classList.add('d-none'); |
|
resultsCard.classList.add('d-none'); |
|
|
|
|
|
documentPages = []; |
|
selectedPages = []; |
|
extractedTexts = []; |
|
|
|
try { |
|
const fileType = file.name.split('.').pop().toLowerCase(); |
|
|
|
if (fileType === 'pdf') { |
|
await processPdf(file); |
|
} else if (['jpg', 'jpeg', 'png'].includes(fileType)) { |
|
await processImage(file); |
|
} else { |
|
throw new Error('نوع الملف غير مدعوم. يرجى اختيار ملف PDF أو صورة.'); |
|
} |
|
|
|
|
|
pdfPagesCard.classList.remove('d-none'); |
|
|
|
|
|
processingStatus.classList.add('d-none'); |
|
|
|
} catch (error) { |
|
console.error('Error processing file:', error); |
|
statusText.textContent = `خطأ: ${error.message}`; |
|
|
|
} |
|
} |
|
|
|
|
|
async function processPdf(file) { |
|
try { |
|
|
|
const arrayBuffer = await file.arrayBuffer(); |
|
const pdfDoc = await pdfjsLib.getDocument({ data: arrayBuffer }).promise; |
|
|
|
|
|
const numPages = pdfDoc.numPages; |
|
if (numPages > 100) { |
|
alert('هذا الملف يحتوي على أكثر من 30 صفحة. سيتم معالجة أول 30 صفحة فقط.'); |
|
} |
|
|
|
|
|
pdfPagesContainer.innerHTML = ''; |
|
|
|
|
|
const maxPages = Math.min(numPages, 100); |
|
for (let i = 1; i <= maxPages; i++) { |
|
|
|
progressBar.style.width = `${(i / maxPages) * 100}%`; |
|
statusText.textContent = `جاري معالجة الصفحة ${i} من ${maxPages}...`; |
|
|
|
|
|
const pageImage = await convertPdfPageToImage(pdfDoc, i); |
|
|
|
|
|
documentPages.push({ |
|
pageNumber: i, |
|
imageData: pageImage.imageData, |
|
width: pageImage.width, |
|
height: pageImage.height |
|
}); |
|
|
|
|
|
createPageElement(pageImage.imageData, i); |
|
} |
|
|
|
} catch (error) { |
|
console.error('Error processing PDF:', error); |
|
throw error; |
|
} |
|
} |
|
|
|
|
|
async function processImage(file) { |
|
try { |
|
|
|
const imageData = await readFileAsDataURL(file); |
|
|
|
|
|
const img = new Image(); |
|
await new Promise((resolve, reject) => { |
|
img.onload = resolve; |
|
img.onerror = reject; |
|
img.src = imageData; |
|
}); |
|
|
|
|
|
pdfPagesContainer.innerHTML = ''; |
|
|
|
|
|
documentPages = [{ |
|
pageNumber: 1, |
|
imageData: imageData, |
|
width: img.width, |
|
height: img.height |
|
}]; |
|
|
|
|
|
createPageElement(imageData, 1); |
|
|
|
|
|
progressBar.style.width = '100%'; |
|
statusText.textContent = 'تمت معالجة الصورة بنجاح'; |
|
|
|
} catch (error) { |
|
console.error('Error processing image:', error); |
|
throw error; |
|
} |
|
} |
|
|
|
|
|
async function convertPdfPageToImage(pdfDoc, pageNumber, scale = 1.5) { |
|
try { |
|
|
|
const page = await pdfDoc.getPage(pageNumber); |
|
|
|
|
|
const canvas = document.createElement('canvas'); |
|
const context = canvas.getContext('2d'); |
|
|
|
|
|
const viewport = page.getViewport({ scale }); |
|
canvas.width = viewport.width; |
|
canvas.height = viewport.height; |
|
|
|
|
|
await page.render({ |
|
canvasContext: context, |
|
viewport: viewport |
|
}).promise; |
|
|
|
|
|
return { |
|
imageData: canvas.toDataURL('image/png'), |
|
width: viewport.width, |
|
height: viewport.height, |
|
pageNumber: pageNumber |
|
}; |
|
} catch (error) { |
|
console.error(`Error converting PDF page ${pageNumber} to image:`, error); |
|
throw error; |
|
} |
|
} |
|
|
|
|
|
function createPageElement(imageData, pageNumber) { |
|
const pageDiv = document.createElement('div'); |
|
pageDiv.className = 'pdf-page'; |
|
pageDiv.dataset.page = pageNumber; |
|
|
|
const img = document.createElement('img'); |
|
img.src = imageData; |
|
img.alt = `صفحة ${pageNumber}`; |
|
|
|
const pageNumberDiv = document.createElement('div'); |
|
pageNumberDiv.className = 'page-number'; |
|
pageNumberDiv.textContent = `صفحة ${pageNumber}`; |
|
|
|
pageDiv.appendChild(img); |
|
pageDiv.appendChild(pageNumberDiv); |
|
|
|
|
|
pageDiv.addEventListener('click', function() { |
|
this.classList.toggle('selected'); |
|
|
|
|
|
const page = parseInt(this.dataset.page); |
|
if (this.classList.contains('selected')) { |
|
if (!selectedPages.includes(page)) { |
|
selectedPages.push(page); |
|
} |
|
} else { |
|
const index = selectedPages.indexOf(page); |
|
if (index > -1) { |
|
selectedPages.splice(index, 1); |
|
} |
|
} |
|
}); |
|
|
|
pdfPagesContainer.appendChild(pageDiv); |
|
} |
|
|
|
|
|
function readFileAsDataURL(file) { |
|
return new Promise((resolve, reject) => { |
|
const reader = new FileReader(); |
|
reader.onload = e => resolve(e.target.result); |
|
reader.onerror = reject; |
|
reader.readAsDataURL(file); |
|
}); |
|
} |
|
|
|
|
|
function selectAllPages() { |
|
document.querySelectorAll('.pdf-page').forEach(page => { |
|
page.classList.add('selected'); |
|
const pageNumber = parseInt(page.dataset.page); |
|
if (!selectedPages.includes(pageNumber)) { |
|
selectedPages.push(pageNumber); |
|
} |
|
}); |
|
} |
|
|
|
|
|
function deselectAllPages() { |
|
document.querySelectorAll('.pdf-page').forEach(page => { |
|
page.classList.remove('selected'); |
|
}); |
|
selectedPages = []; |
|
} |
|
|
|
|
|
function extractImages() { |
|
if (documentPages.length === 0) { |
|
alert('لا توجد صفحات للمعالجة'); |
|
return; |
|
} |
|
|
|
|
|
const pagesToExtract = selectedPages.length > 0 |
|
? documentPages.filter(page => selectedPages.includes(page.pageNumber)) |
|
: documentPages; |
|
|
|
if (pagesToExtract.length === 0) { |
|
alert('الرجاء تحديد صفحة واحدة على الأقل'); |
|
return; |
|
} |
|
|
|
|
|
pagesToExtract.forEach(page => { |
|
const a = document.createElement('a'); |
|
a.href = page.imageData; |
|
a.download = `mondo_lingua_page_${page.pageNumber}.png`; |
|
document.body.appendChild(a); |
|
a.click(); |
|
document.body.removeChild(a); |
|
}); |
|
|
|
|
|
alert(`تم استخراج ${pagesToExtract.length} صورة بنجاح`); |
|
} |
|
|
|
|
|
async function extractText() { |
|
if (documentPages.length === 0) { |
|
alert('لا توجد صفحات للمعالجة'); |
|
return; |
|
} |
|
|
|
|
|
const pagesToProcess = selectedPages.length > 0 |
|
? documentPages.filter(page => selectedPages.includes(page.pageNumber)) |
|
: documentPages; |
|
|
|
if (pagesToProcess.length === 0) { |
|
alert('الرجاء تحديد صفحة واحدة على الأقل'); |
|
return; |
|
} |
|
|
|
|
|
processingStatus.classList.remove('d-none'); |
|
statusText.textContent = 'جاري استخراج النص...'; |
|
progressBar.style.width = '0%'; |
|
|
|
try { |
|
extractedTexts = []; |
|
|
|
|
|
for (let i = 0; i < pagesToProcess.length; i++) { |
|
const page = pagesToProcess[i]; |
|
|
|
|
|
progressBar.style.width = `${((i + 1) / pagesToProcess.length) * 100}%`; |
|
statusText.textContent = `جاري معالجة الصفحة ${i + 1} من ${pagesToProcess.length}...`; |
|
|
|
|
|
const pageText = await extractTextFromImage(page.imageData, page.pageNumber); |
|
extractedTexts.push(pageText); |
|
} |
|
|
|
|
|
const combinedText = extractedTexts.join('\n\n'); |
|
|
|
|
|
resultText.textContent = combinedText; |
|
resultsCard.classList.remove('d-none'); |
|
|
|
|
|
processingStatus.classList.add('d-none'); |
|
|
|
|
|
resultsCard.scrollIntoView({ behavior: 'smooth' }); |
|
|
|
} catch (error) { |
|
console.error('Error extracting text:', error); |
|
statusText.textContent = `خطأ: ${error.message}`; |
|
} |
|
} |
|
|
|
|
|
async function extractTextFromImage(imageData, pageNumber) { |
|
try { |
|
|
|
const response = await fetch(imageData); |
|
const blob = await response.blob(); |
|
|
|
|
|
const formData = new FormData(); |
|
formData.append('image', blob, `page_${pageNumber}.png`); |
|
|
|
|
|
const ocrResponse = await fetch(OCR_API_URL, { |
|
method: 'POST', |
|
headers: { |
|
'X-RapidAPI-Key': RAPIDAPI_KEY, |
|
'X-RapidAPI-Host': 'ocr43.p.rapidapi.com' |
|
}, |
|
body: formData |
|
}); |
|
|
|
if (!ocrResponse.ok) { |
|
throw new Error(`فشل في طلب OCR: ${ocrResponse.status}`); |
|
} |
|
|
|
const data = await ocrResponse.json(); |
|
|
|
try { |
|
|
|
const text = data.results[0].entities[0].objects[0].entities[0].text; |
|
return `=== صفحة ${pageNumber} ===\n${text}`; |
|
} catch (e) { |
|
console.error('Error parsing OCR response:', e); |
|
return `=== صفحة ${pageNumber} ===\n[خطأ في معالجة النص]`; |
|
} |
|
} catch (error) { |
|
console.error(`Error in OCR for page ${pageNumber}:`, error); |
|
throw error; |
|
} |
|
} |
|
|
|
|
|
function copyText() { |
|
const text = resultText.textContent; |
|
if (!text || text === 'لم يتم استخراج نص بعد.') { |
|
alert('لا يوجد نص للنسخ'); |
|
return; |
|
} |
|
|
|
navigator.clipboard.writeText(text) |
|
.then(() => alert('تم نسخ النص بنجاح')) |
|
.catch(err => alert('حدث خطأ أثناء نسخ النص: ' + err)); |
|
} |
|
|
|
|
|
function downloadText() { |
|
const text = resultText.textContent; |
|
if (!text || text === 'لم يتم استخراج نص بعد.') { |
|
alert('لا يوجد نص للتنزيل'); |
|
return; |
|
} |
|
|
|
const blob = new Blob([text], { type: 'text/plain;charset=utf-8' }); |
|
const url = URL.createObjectURL(blob); |
|
const a = document.createElement('a'); |
|
a.href = url; |
|
a.download = 'mondo_lingua_ocr_text.txt'; |
|
document.body.appendChild(a); |
|
a.click(); |
|
document.body.removeChild(a); |
|
URL.revokeObjectURL(url); |
|
} |
|
</script> |
|
</body> |
|
</html> |