lik07 commited on
Commit
4c5eb78
·
verified ·
1 Parent(s): e639253

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -19
app.py CHANGED
@@ -3,6 +3,7 @@ from docx import Document
3
  import os
4
  import shutil
5
  import zipfile
 
6
 
7
  def split_by_headers(file_path, headers_per_chunk=1):
8
  doc = Document(file_path)
@@ -11,15 +12,12 @@ def split_by_headers(file_path, headers_per_chunk=1):
11
  header_count = 0
12
 
13
  for paragraph in doc.paragraphs:
14
- # Verificar si el párrafo es un encabezado
15
  if paragraph.style.name.startswith('Heading'):
16
  header_count += 1
17
  if header_count > headers_per_chunk:
18
  chunks.append(current_chunk)
19
  current_chunk = Document()
20
  header_count = 1
21
-
22
- # Copiar el párrafo al chunk actual
23
  current_chunk.add_paragraph(paragraph.text, style=paragraph.style.name)
24
 
25
  if len(current_chunk.paragraphs):
@@ -32,7 +30,7 @@ def split_by_pages(file_path, pages_per_chunk=1):
32
  chunks = []
33
  current_chunk = Document()
34
  page_count = 0
35
- estimated_chars_per_page = 3000 # Esta es una estimación
36
  char_count = 0
37
 
38
  for paragraph in doc.paragraphs:
@@ -42,12 +40,10 @@ def split_by_pages(file_path, pages_per_chunk=1):
42
  if char_count >= estimated_chars_per_page:
43
  page_count += 1
44
  char_count = 0
45
-
46
  if page_count >= pages_per_chunk:
47
  chunks.append(current_chunk)
48
  current_chunk = Document()
49
  page_count = 0
50
-
51
  current_chunk.add_paragraph(text, style=paragraph.style.name)
52
 
53
  if len(current_chunk.paragraphs):
@@ -60,42 +56,40 @@ def save_chunks(chunks, original_filename):
60
  base_name = os.path.splitext(original_filename)[0]
61
 
62
  for i, chunk in enumerate(chunks, 1):
63
- output_path = f"{base_name}_part{i}.docx"
64
- chunk.save(output_path)
65
- saved_files.append(output_path)
66
 
67
  return saved_files
68
 
69
- def zip_files(files, zip_name="document_parts.zip"):
70
- with zipfile.ZipFile(zip_name, 'w') as zipf:
 
71
  for file in files:
72
  zipf.write(file, os.path.basename(file))
73
- return zip_name
74
 
75
  def process_document(file, split_type, headers_or_pages, download_type):
76
  if headers_or_pages < 1:
77
  return "Por favor, especifique un número positivo de encabezados o páginas por fragmento."
78
 
79
  try:
80
- # Dividir el documento según el tipo seleccionado
81
  if split_type == "Encabezados":
82
  chunks = split_by_headers(file.name, headers_or_pages)
83
- else: # Páginas
84
  chunks = split_by_pages(file.name, headers_or_pages)
85
 
86
  saved_files = save_chunks(chunks, os.path.basename(file.name))
87
 
88
- # Si el usuario selecciona descargar en ZIP, comprimir los archivos
89
  if download_type == "ZIP":
90
  zip_path = zip_files(saved_files)
91
- return zip_path # Devolver el archivo zip para descargar
92
  else:
93
- return saved_files # Devolver los archivos separados para descargar
94
 
95
  except Exception as e:
96
  return f"Error al procesar el documento: {str(e)}"
97
 
98
- # Interfaz Gradio
99
  iface = gr.Interface(
100
  fn=process_document,
101
  inputs=[
@@ -110,4 +104,4 @@ iface = gr.Interface(
110
  )
111
 
112
  if __name__ == "__main__":
113
- iface.launch()
 
3
  import os
4
  import shutil
5
  import zipfile
6
+ import tempfile
7
 
8
  def split_by_headers(file_path, headers_per_chunk=1):
9
  doc = Document(file_path)
 
12
  header_count = 0
13
 
14
  for paragraph in doc.paragraphs:
 
15
  if paragraph.style.name.startswith('Heading'):
16
  header_count += 1
17
  if header_count > headers_per_chunk:
18
  chunks.append(current_chunk)
19
  current_chunk = Document()
20
  header_count = 1
 
 
21
  current_chunk.add_paragraph(paragraph.text, style=paragraph.style.name)
22
 
23
  if len(current_chunk.paragraphs):
 
30
  chunks = []
31
  current_chunk = Document()
32
  page_count = 0
33
+ estimated_chars_per_page = 3000
34
  char_count = 0
35
 
36
  for paragraph in doc.paragraphs:
 
40
  if char_count >= estimated_chars_per_page:
41
  page_count += 1
42
  char_count = 0
 
43
  if page_count >= pages_per_chunk:
44
  chunks.append(current_chunk)
45
  current_chunk = Document()
46
  page_count = 0
 
47
  current_chunk.add_paragraph(text, style=paragraph.style.name)
48
 
49
  if len(current_chunk.paragraphs):
 
56
  base_name = os.path.splitext(original_filename)[0]
57
 
58
  for i, chunk in enumerate(chunks, 1):
59
+ temp_path = os.path.join(tempfile.gettempdir(), f"{base_name}_part{i}.docx")
60
+ chunk.save(temp_path)
61
+ saved_files.append(temp_path)
62
 
63
  return saved_files
64
 
65
+ def zip_files(files):
66
+ zip_path = os.path.join(tempfile.gettempdir(), "document_parts.zip")
67
+ with zipfile.ZipFile(zip_path, 'w') as zipf:
68
  for file in files:
69
  zipf.write(file, os.path.basename(file))
70
+ return zip_path
71
 
72
  def process_document(file, split_type, headers_or_pages, download_type):
73
  if headers_or_pages < 1:
74
  return "Por favor, especifique un número positivo de encabezados o páginas por fragmento."
75
 
76
  try:
 
77
  if split_type == "Encabezados":
78
  chunks = split_by_headers(file.name, headers_or_pages)
79
+ else:
80
  chunks = split_by_pages(file.name, headers_or_pages)
81
 
82
  saved_files = save_chunks(chunks, os.path.basename(file.name))
83
 
 
84
  if download_type == "ZIP":
85
  zip_path = zip_files(saved_files)
86
+ return gr.File(zip_path) # Usar gr.File para devolver el archivo zip
87
  else:
88
+ return [gr.File(f) for f in saved_files] # Usar gr.File para devolver archivos separados
89
 
90
  except Exception as e:
91
  return f"Error al procesar el documento: {str(e)}"
92
 
 
93
  iface = gr.Interface(
94
  fn=process_document,
95
  inputs=[
 
104
  )
105
 
106
  if __name__ == "__main__":
107
+ iface.launch()