Spaces:

lik07
/

docx-spliter

Sleeping

App Files Files Community

lik07 commited on Oct 5, 2024

Commit

4c5eb78

verified ·

1 Parent(s): e639253

Update app.py

Browse files

Files changed (1) hide show

app.py +13 -19

app.py CHANGED Viewed

@@ -3,6 +3,7 @@ from docx import Document
 import os
 import shutil
 import zipfile
 def split_by_headers(file_path, headers_per_chunk=1):
     doc = Document(file_path)
@@ -11,15 +12,12 @@ def split_by_headers(file_path, headers_per_chunk=1):
     header_count = 0
     for paragraph in doc.paragraphs:
-        # Verificar si el párrafo es un encabezado
         if paragraph.style.name.startswith('Heading'):
             header_count += 1
             if header_count > headers_per_chunk:
                 chunks.append(current_chunk)
                 current_chunk = Document()
                 header_count = 1
-        # Copiar el párrafo al chunk actual
         current_chunk.add_paragraph(paragraph.text, style=paragraph.style.name)
     if len(current_chunk.paragraphs):
@@ -32,7 +30,7 @@ def split_by_pages(file_path, pages_per_chunk=1):
     chunks = []
     current_chunk = Document()
     page_count = 0
-    estimated_chars_per_page = 3000  # Esta es una estimación
     char_count = 0
     for paragraph in doc.paragraphs:
@@ -42,12 +40,10 @@ def split_by_pages(file_path, pages_per_chunk=1):
         if char_count >= estimated_chars_per_page:
             page_count += 1
             char_count = 0
             if page_count >= pages_per_chunk:
                 chunks.append(current_chunk)
                 current_chunk = Document()
                 page_count = 0
         current_chunk.add_paragraph(text, style=paragraph.style.name)
     if len(current_chunk.paragraphs):
@@ -60,42 +56,40 @@ def save_chunks(chunks, original_filename):
     base_name = os.path.splitext(original_filename)[0]
     for i, chunk in enumerate(chunks, 1):
-        output_path = f"{base_name}_part{i}.docx"
-        chunk.save(output_path)
-        saved_files.append(output_path)
     return saved_files
-def zip_files(files, zip_name="document_parts.zip"):
-    with zipfile.ZipFile(zip_name, 'w') as zipf:
         for file in files:
             zipf.write(file, os.path.basename(file))
-    return zip_name
 def process_document(file, split_type, headers_or_pages, download_type):
     if headers_or_pages < 1:
         return "Por favor, especifique un número positivo de encabezados o páginas por fragmento."
     try:
-        # Dividir el documento según el tipo seleccionado
         if split_type == "Encabezados":
             chunks = split_by_headers(file.name, headers_or_pages)
-        else:  # Páginas
             chunks = split_by_pages(file.name, headers_or_pages)
         saved_files = save_chunks(chunks, os.path.basename(file.name))
-        # Si el usuario selecciona descargar en ZIP, comprimir los archivos
         if download_type == "ZIP":
             zip_path = zip_files(saved_files)
-            return zip_path  # Devolver el archivo zip para descargar
         else:
-            return saved_files  # Devolver los archivos separados para descargar
     except Exception as e:
         return f"Error al procesar el documento: {str(e)}"
-# Interfaz Gradio
 iface = gr.Interface(
     fn=process_document,
     inputs=[
@@ -110,4 +104,4 @@ iface = gr.Interface(
 )
 if __name__ == "__main__":
-    iface.launch()

 import os
 import shutil
 import zipfile
+import tempfile
 def split_by_headers(file_path, headers_per_chunk=1):
     doc = Document(file_path)
     header_count = 0
     for paragraph in doc.paragraphs:
         if paragraph.style.name.startswith('Heading'):
             header_count += 1
             if header_count > headers_per_chunk:
                 chunks.append(current_chunk)
                 current_chunk = Document()
                 header_count = 1
         current_chunk.add_paragraph(paragraph.text, style=paragraph.style.name)
     if len(current_chunk.paragraphs):
     chunks = []
     current_chunk = Document()
     page_count = 0
+    estimated_chars_per_page = 3000
     char_count = 0
     for paragraph in doc.paragraphs:
         if char_count >= estimated_chars_per_page:
             page_count += 1
             char_count = 0
             if page_count >= pages_per_chunk:
                 chunks.append(current_chunk)
                 current_chunk = Document()
                 page_count = 0
         current_chunk.add_paragraph(text, style=paragraph.style.name)
     if len(current_chunk.paragraphs):
     base_name = os.path.splitext(original_filename)[0]
     for i, chunk in enumerate(chunks, 1):
+        temp_path = os.path.join(tempfile.gettempdir(), f"{base_name}_part{i}.docx")
+        chunk.save(temp_path)
+        saved_files.append(temp_path)
     return saved_files
+def zip_files(files):
+    zip_path = os.path.join(tempfile.gettempdir(), "document_parts.zip")
+    with zipfile.ZipFile(zip_path, 'w') as zipf:
         for file in files:
             zipf.write(file, os.path.basename(file))
+    return zip_path
 def process_document(file, split_type, headers_or_pages, download_type):
     if headers_or_pages < 1:
         return "Por favor, especifique un número positivo de encabezados o páginas por fragmento."
     try:
         if split_type == "Encabezados":
             chunks = split_by_headers(file.name, headers_or_pages)
+        else:
             chunks = split_by_pages(file.name, headers_or_pages)
         saved_files = save_chunks(chunks, os.path.basename(file.name))
         if download_type == "ZIP":
             zip_path = zip_files(saved_files)
+            return gr.File(zip_path)  # Usar gr.File para devolver el archivo zip
         else:
+            return [gr.File(f) for f in saved_files]  # Usar gr.File para devolver archivos separados
     except Exception as e:
         return f"Error al procesar el documento: {str(e)}"
 iface = gr.Interface(
     fn=process_document,
     inputs=[
 )
 if __name__ == "__main__":
+    iface.launch()