Spaces:

Toowired
/

tts-reader

Running

App Files Files Community

Toowired commited on May 13

Commit

c1a840b

verified ·

1 Parent(s): 4ba1281

Update index.html

Browse files

Files changed (1) hide show

index.html +348 -21

index.html CHANGED Viewed

@@ -1,4 +1,5 @@
-<!DOCTYPE html>
 <html lang="en">
 <head>
     <meta charset="UTF-8">
@@ -293,13 +294,14 @@
                     <button id="uploadBtn" class="bg-blue-500 hover:bg-blue-600 text-white py-3 px-4 rounded-lg flex items-center justify-center transition">
                         <i class="fas fa-upload mr-2"></i> Upload File
                     </button>
-                    <button id="pasteBtn" class="bg-green-500 hover:green-600 text-white py-3 px-4 rounded-lg flex items-center justify-center transition">
                         <i class="fas fa-paste mr-2"></i> Paste Text
                     </button>
                     <div id="dropzone" class="dropzone bg-gray-50 py-3 px-4 rounded-lg flex items-center justify-center cursor-pointer">
                         <div class="text-center">
                             <i class="fas fa-file-import text-2xl text-gray-400 mb-1"></i>
                             <p class="text-gray-500">Drag & Drop File</p>
                         </div>
                     </div>
                 </div>
@@ -2631,51 +2633,159 @@
                 }
             });
-            // File processing
             function handleFile(file) {
                 const fileType = file.type;
                 const fileName = file.name.toLowerCase();
-                if (fileType === 'text/plain' || fileName.endsWith('.txt')) {
-                    readTextFile(file);
-                } else if (fileType === 'application/pdf' || fileName.endsWith('.pdf')) {
-                    readPDFFile(file);
-                } else if (fileType === 'application/msword' ||
-                           fileType === 'application/vnd.openxmlformats-officedocument.wordprocessingml.document' ||
-                           fileName.endsWith('.doc') || fileName.endsWith('.docx')) {
-                    showToast('Word document support requires conversion to text. Please save as plain text or PDF first.', 'error');
-                } else {
-                    showToast('Unsupported file type. Please upload a TXT or PDF file.', 'error');
                 }
             }
             function readTextFile(file) {
                 const reader = new FileReader();
                 reader.onload = (e) => {
-                    setDocumentContent(e.target.result);
                 };
-                reader.readAsText(file);
             }
             function readPDFFile(file) {
-                // Check if PDF.js is available
                 if (typeof pdfjsLib === 'undefined') {
-                    showToast('PDF.js not loaded. PDF support requires PDF.js library.', 'error');
                     return;
                 }
-                // Show loading state
-                documentContent.innerHTML = '<div class="text-center py-8"><div class="loading-spinner mx-auto"></div><p class="text-gray-500 mt-2">Processing PDF file...</p></div>';
-                // Load PDF.js
-                pdfjsLib.getDocument(URL.createObjectURL(file)).promise.then(pdf => {
                     let text = '';
                     const numPages = pdf.numPages;
                     const pagePromises = [];
                     for (let i = 1; i <= numPages; i++) {
                         pagePromises.push(pdf.getPage(i).then(page => {
                             return page.getTextContent().then(textContent => {
                                 return textContent.items.map(item => item.str).join(' ');
                             });
                         }));
@@ -2684,16 +2794,233 @@
                     Promise.all(pagePromises).then(pagesText => {
                         text = pagesText.join('\n\n');
                         setDocumentContent(text);
                     }).catch(error => {
                         console.error('Error extracting PDF text:', error);
                         showToast('Failed to extract text from PDF', 'error');
                     });
                 }).catch(error => {
                     console.error('Error loading PDF:', error);
                     showToast('Failed to load PDF file', 'error');
                 });
             }
             // Set document content
             function setDocumentContent(text) {
                 currentText = text;

+<input type="file" id="fileInput" class="hidden" accept=".txt,.pdf,.doc,.docx,.odt,.rtf,.md,.markdown,.json,.html,.htm,.xml,.csv,.epub">&lt;/antml:parameter>
+<parameter name="old_str">                <input type="file" id="fileInput" class="hidden" accept=".txt,.pdf,.doc,.docx,.odt,.rtf"><!DOCTYPE html>
 <html lang="en">
 <head>
     <meta charset="UTF-8">
                     <button id="uploadBtn" class="bg-blue-500 hover:bg-blue-600 text-white py-3 px-4 rounded-lg flex items-center justify-center transition">
                         <i class="fas fa-upload mr-2"></i> Upload File
                     </button>
+                    <button id="pasteBtn" class="bg-green-500 hover:bg-green-600 text-white py-3 px-4 rounded-lg flex items-center justify-center transition">
                         <i class="fas fa-paste mr-2"></i> Paste Text
                     </button>
                     <div id="dropzone" class="dropzone bg-gray-50 py-3 px-4 rounded-lg flex items-center justify-center cursor-pointer">
                         <div class="text-center">
                             <i class="fas fa-file-import text-2xl text-gray-400 mb-1"></i>
                             <p class="text-gray-500">Drag & Drop File</p>
+                            <p class="text-xs text-gray-400 mt-1">Supports: TXT, PDF, DOC/DOCX, RTF, MD, JSON, HTML, XML, CSV</p>
                         </div>
                     </div>
                 </div>
                 }
             });
+            // Enhanced file processing with size validation and progress
             function handleFile(file) {
+                // Validate file size (50MB limit)
+                const maxSize = 50 * 1024 * 1024; // 50MB
+                if (file.size > maxSize) {
+                    showToast('File too large. Please upload a file smaller than 50MB.', 'error');
+                    return;
+                }
                 const fileType = file.type;
                 const fileName = file.name.toLowerCase();
+                const fileExtension = fileName.split('.').pop();
+                // Show processing progress
+                showFileProgress(fileName, 'processing');
+                documentContent.innerHTML = `
+                    <div class="text-center py-8">
+                        <div class="loading-spinner mx-auto"></div>
+                        <p class="text-gray-500 mt-2">Processing ${fileName}...</p>
+                        <p class="text-xs text-gray-400 mt-1">File size: ${formatFileSize(file.size)}</p>
+                    </div>
+                `;
+                try {
+                    switch (fileExtension) {
+                        case 'txt':
+                            readTextFile(file);
+                            break;
+                        case 'pdf':
+                            readPDFFile(file);
+                            break;
+                        case 'doc':
+                        case 'docx':
+                            readWordDocument(file);
+                            break;
+                        case 'rtf':
+                            readRTFFile(file);
+                            break;
+                        case 'md':
+                        case 'markdown':
+                            readMarkdownFile(file);
+                            break;
+                        case 'json':
+                            readJSONFile(file);
+                            break;
+                        case 'html':
+                        case 'htm':
+                            readHTMLFile(file);
+                            break;
+                        case 'xml':
+                            readXMLFile(file);
+                            break;
+                        case 'csv':
+                            readCSVFile(file);
+                            break;
+                        case 'odt':
+                            throw new Error('ODT files require conversion. Please save as DOC/DOCX or PDF.');
+                        case 'epub':
+                            throw new Error('EPUB files are not yet supported. Please extract text manually.');
+                        default:
+                            // Try to read as plain text if unknown extension
+                            if (fileType.startsWith('text/')) {
+                                readTextFile(file);
+                            } else {
+                                throw new Error(`Unsupported file type: .${fileExtension}. Please upload a supported document format.`);
+                            }
+                    }
+                } catch (error) {
+                    console.error('Error processing file:', error);
+                    showToast('Error processing file: ' + error.message, 'error');
+                    documentContent.innerHTML = '<p class="text-gray-500 italic">Error loading document. Please try again.</p>';
+                    showFileProgress(fileName, 'error');
                 }
             }
+            // File progress indicator
+            function showFileProgress(fileName, status) {
+                const progressEl = document.getElementById('fileProgress');
+                if (!progressEl) {
+                    const progress = document.createElement('div');
+                    progress.id = 'fileProgress';
+                    progress.className = 'fixed bottom-4 right-4 bg-white border border-gray-200 rounded-lg shadow-lg p-3 z-50';
+                    document.body.appendChild(progress);
+                }
+                const progressEl2 = document.getElementById('fileProgress');
+                const statusIcons = {
+                    processing: '<i class="fas fa-spinner fa-spin text-blue-500 mr-2"></i>',
+                    success: '<i class="fas fa-check-circle text-green-500 mr-2"></i>',
+                    error: '<i class="fas fa-times-circle text-red-500 mr-2"></i>'
+                };
+                progressEl2.innerHTML = `
+                    ${statusIcons[status]}
+                    <span class="text-sm">${fileName}</span>
+                `;
+                if (status !== 'processing') {
+                    setTimeout(() => {
+                        progressEl2.remove();
+                    }, 3000);
+                }
+            }
+            // Enhanced text file reader with encoding detection
             function readTextFile(file) {
                 const reader = new FileReader();
                 reader.onload = (e) => {
+                    const text = e.target.result;
+                    setDocumentContent(text);
+                    showFileProgress(file.name, 'success');
+                    showToast(`Loaded ${formatFileSize(file.size)} text file`, 'success');
                 };
+                reader.onerror = () => {
+                    showToast('Error reading text file', 'error');
+                    showFileProgress(file.name, 'error');
+                };
+                reader.readAsText(file, 'UTF-8');
             }
+            // Enhanced PDF reader with progress
             function readPDFFile(file) {
                 if (typeof pdfjsLib === 'undefined') {
+                    showToast('PDF.js not loaded. Refreshing page...', 'error');
+                    setTimeout(() => window.location.reload(), 2000);
                     return;
                 }
+                const fileURL = URL.createObjectURL(file);
+                pdfjsLib.getDocument(fileURL).promise.then(pdf => {
                     let text = '';
                     const numPages = pdf.numPages;
                     const pagePromises = [];
+                    // Update progress as pages are processed
+                    let pagesProcessed = 0;
                     for (let i = 1; i <= numPages; i++) {
                         pagePromises.push(pdf.getPage(i).then(page => {
                             return page.getTextContent().then(textContent => {
+                                pagesProcessed++;
+                                const progress = (pagesProcessed / numPages) * 100;
+                                documentContent.innerHTML = `
+                                    <div class="text-center py-8">
+                                        <div class="loading-spinner mx-auto"></div>
+                                        <p class="text-gray-500 mt-2">Processing PDF...</p>
+                                        <div class="w-32 bg-gray-200 rounded-full h-2 mx-auto mt-3">
+                                            <div class="bg-blue-600 h-2 rounded-full transition-all duration-300" style="width: ${progress}%"></div>
+                                        </div>
+                                        <p class="text-xs text-gray-400 mt-2">${pagesProcessed}/${numPages} pages</p>
+                                    </div>
+                                `;
                                 return textContent.items.map(item => item.str).join(' ');
                             });
                         }));
                     Promise.all(pagePromises).then(pagesText => {
                         text = pagesText.join('\n\n');
                         setDocumentContent(text);
+                        URL.revokeObjectURL(fileURL);
+                        showFileProgress(file.name, 'success');
+                        showToast(`Loaded PDF with ${numPages} pages`, 'success');
                     }).catch(error => {
                         console.error('Error extracting PDF text:', error);
                         showToast('Failed to extract text from PDF', 'error');
+                        URL.revokeObjectURL(fileURL);
+                        showFileProgress(file.name, 'error');
                     });
                 }).catch(error => {
                     console.error('Error loading PDF:', error);
                     showToast('Failed to load PDF file', 'error');
+                    URL.revokeObjectURL(fileURL);
+                    showFileProgress(file.name, 'error');
                 });
             }
+            // Word document reader (.doc/.docx)
+            function readWordDocument(file) {
+                const reader = new FileReader();
+                reader.onload = async (e) => {
+                    try {
+                        // Use mammoth to convert docx to text
+                        const result = await mammoth.extractRawText({arrayBuffer: e.target.result});
+                        setDocumentContent(result.value);
+                        if (result.messages.length > 0) {
+                            console.warn('Word processing warnings:', result.messages);
+                        }
+                    } catch (error) {
+                        console.error('Error reading Word document:', error);
+                        showToast('Error reading Word document. Please try saving as a different format.', 'error');
+                    }
+                };
+                reader.onerror = () => {
+                    showToast('Error reading Word document', 'error');
+                };
+                reader.readAsArrayBuffer(file);
+            }
+            // RTF file reader
+            function readRTFFile(file) {
+                const reader = new FileReader();
+                reader.onload = (e) => {
+                    let rtfText = e.target.result;
+                    // Basic RTF to plain text conversion
+                    // Remove RTF control codes
+                    rtfText = rtfText.replace(/\\[a-z]+[0-9]*\s?/gi, '');
+                    rtfText = rtfText.replace(/[{}]/g, '');
+                    rtfText = rtfText.replace(/\\\\/g, '\\');
+                    rtfText = rtfText.replace(/\\'/g, "'");
+                    rtfText = rtfText.replace(/\\\n/g, '\n');
+                    rtfText = rtfText.replace(/\s+/g, ' ');
+                    rtfText = rtfText.trim();
+                    setDocumentContent(rtfText);
+                };
+                reader.onerror = () => {
+                    showToast('Error reading RTF file', 'error');
+                };
+                reader.readAsText(file);
+            }
+            // Markdown file reader
+            function readMarkdownFile(file) {
+                const reader = new FileReader();
+                reader.onload = (e) => {
+                    let text = e.target.result;
+                    // Convert basic Markdown to plain text
+                    // Remove Markdown syntax for better TTS
+                    text = text.replace(/^#{1,6}\s+/gm, ''); // Headers
+                    text = text.replace(/\*\*(.*?)\*\*/g, '$1'); // Bold
+                    text = text.replace(/\*(.*?)\*/g, '$1'); // Italic
+                    text = text.replace(/_(.*?)_/g, '$1'); // Italic underscore
+                    text = text.replace(/`(.*?)`/g, '$1'); // Inline code
+                    text = text.replace(/\[([^\]]+)\]\([^)]+\)/g, '$1'); // Links
+                    text = text.replace(/!\[([^\]]*)\]\([^)]+\)/g, ''); // Images
+                    text = text.replace(/^>\s+/gm, ''); // Blockquotes
+                    text = text.replace(/^[\s]*-\s+/gm, '• '); // Unordered lists
+                    text = text.replace(/^[\s]*\d+\.\s+/gm, ''); // Ordered lists
+                    text = text.replace(/---+/g, ''); // Horizontal rules
+                    text = text.replace(/\n{3,}/g, '\n\n'); // Multiple newlines
+                    setDocumentContent(text);
+                };
+                reader.onerror = () => {
+                    showToast('Error reading Markdown file', 'error');
+                };
+                reader.readAsText(file);
+            }
+            // JSON file reader
+            function readJSONFile(file) {
+                const reader = new FileReader();
+                reader.onload = (e) => {
+                    try {
+                        const jsonData = JSON.parse(e.target.result);
+                        // Convert JSON to readable text
+                        let text = '';
+                        function jsonToText(obj, prefix = '') {
+                            for (const [key, value] of Object.entries(obj)) {
+                                if (typeof value === 'object' && value !== null) {
+                                    text += `${prefix}${key}:\n`;
+                                    jsonToText(value, prefix + '  ');
+                                } else {
+                                    text += `${prefix}${key}: ${value}\n`;
+                                }
+                            }
+                        }
+                        jsonToText(jsonData);
+                        setDocumentContent(text);
+                    } catch (error) {
+                        showToast('Invalid JSON file', 'error');
+                    }
+                };
+                reader.onerror = () => {
+                    showToast('Error reading JSON file', 'error');
+                };
+                reader.readAsText(file);
+            }
+            // HTML file reader
+            function readHTMLFile(file) {
+                const reader = new FileReader();
+                reader.onload = (e) => {
+                    const htmlContent = e.target.result;
+                    // Create a temporary DOM element to extract text
+                    const tempDiv = document.createElement('div');
+                    tempDiv.innerHTML = htmlContent;
+                    // Remove script and style tags
+                    const scripts = tempDiv.querySelectorAll('script, style');
+                    scripts.forEach(script => script.remove());
+                    // Extract text content
+                    let text = tempDiv.textContent || tempDiv.innerText || '';
+                    // Clean up whitespace
+                    text = text.replace(/\s+/g, ' ');
+                    text = text.replace(/\n\s*\n/g, '\n\n');
+                    text = text.trim();
+                    setDocumentContent(text);
+                };
+                reader.onerror = () => {
+                    showToast('Error reading HTML file', 'error');
+                };
+                reader.readAsText(file);
+            }
+            // XML file reader
+            function readXMLFile(file) {
+                const reader = new FileReader();
+                reader.onload = (e) => {
+                    try {
+                        const xmlContent = e.target.result;
+                        const parser = new DOMParser();
+                        const xmlDoc = parser.parseFromString(xmlContent, 'text/xml');
+                        // Extract text content from XML
+                        let text = '';
+                        function extractText(node) {
+                            if (node.nodeType === Node.TEXT_NODE) {
+                                text += node.textContent + ' ';
+                            } else if (node.nodeType === Node.ELEMENT_NODE) {
+                                for (const child of node.childNodes) {
+                                    extractText(child);
+                                }
+                            }
+                        }
+                        extractText(xmlDoc.documentElement);
+                        // Clean up text
+                        text = text.replace(/\s+/g, ' ');
+                        text = text.trim();
+                        setDocumentContent(text);
+                    } catch (error) {
+                        showToast('Error parsing XML file', 'error');
+                    }
+                };
+                reader.onerror = () => {
+                    showToast('Error reading XML file', 'error');
+                };
+                reader.readAsText(file);
+            }
+            // CSV file reader
+            function readCSVFile(file) {
+                const reader = new FileReader();
+                reader.onload = (e) => {
+                    const csvContent = e.target.result;
+                    // Parse CSV (simple implementation)
+                    const lines = csvContent.split('\n').filter(line => line.trim());
+                    const headers = lines[0].split(',').map(h => h.trim().replace(/"/g, ''));
+                    let text = `CSV Data Summary:\n\n`;
+                    text += `Headers: ${headers.join(', ')}\n\n`;
+                    text += `Total rows: ${lines.length - 1}\n\n`;
+                    // Add first few rows as example
+                    if (lines.length > 1) {
+                        text += 'First few rows:\n';
+                        for (let i = 1; i <= Math.min(5, lines.length - 1); i++) {
+                            const values = lines[i].split(',').map(v => v.trim().replace(/"/g, ''));
+                            text += `Row ${i}: ${values.join(', ')}\n`;
+                        }
+                    }
+                    setDocumentContent(text);
+                    showToast('CSV file loaded. Showing data summary for TTS.', 'success');
+                };
+                reader.onerror = () => {
+                    showToast('Error reading CSV file', 'error');
+                };
+                reader.readAsText(file);
+            }
             // Set document content
             function setDocumentContent(text) {
                 currentText = text;