Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -3,25 +3,29 @@ from modules.extractive import TFIDFSummarizer, TextRankSummarizer, CombinedSumm
|
|
3 |
from modules.abstractive import load_summarizers, abstractive_summary
|
4 |
from modules.preprocessing import Preprocessor, PDFProcessor
|
5 |
from modules.utils import handle_long_text
|
6 |
-
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
|
7 |
|
8 |
# Cargar modelos abstractivos finetuneados
|
9 |
summarizers = load_summarizers()
|
10 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
# Función principal para generar resúmenes
|
12 |
def summarize(input_text, file, summary_type, method, num_sentences, model_name, max_length, num_beams):
|
13 |
preprocessor = Preprocessor()
|
14 |
|
15 |
-
# Procesar archivo si se carga
|
16 |
if file is not None:
|
17 |
pdf_processor = PDFProcessor()
|
18 |
input_text = pdf_processor.pdf_to_text(file.name)
|
19 |
|
20 |
-
# Validar entrada de texto
|
21 |
if not input_text:
|
22 |
return "Por favor, ingrese texto o cargue un archivo válido."
|
23 |
|
24 |
-
# Limpiar texto de entrada
|
25 |
cleaned_text = preprocessor.clean_text(input_text)
|
26 |
|
27 |
if summary_type == "Extractivo":
|
@@ -36,7 +40,6 @@ def summarize(input_text, file, summary_type, method, num_sentences, model_name,
|
|
36 |
else:
|
37 |
return "Método no válido para resumen extractivo."
|
38 |
|
39 |
-
# Generar resumen extractivo
|
40 |
return summarizer.summarize(
|
41 |
preprocessor.split_into_sentences(cleaned_text),
|
42 |
preprocessor.clean_sentences(preprocessor.split_into_sentences(cleaned_text)),
|
@@ -46,38 +49,30 @@ def summarize(input_text, file, summary_type, method, num_sentences, model_name,
|
|
46 |
elif summary_type == "Abstractivo":
|
47 |
if model_name not in summarizers:
|
48 |
return "Modelo no disponible para resumen abstractivo."
|
49 |
-
model, tokenizer = summarizers[model_name]
|
50 |
return handle_long_text(
|
51 |
cleaned_text,
|
52 |
-
|
53 |
-
|
54 |
max_length=max_length,
|
55 |
stride=128,
|
56 |
)
|
57 |
|
58 |
-
|
59 |
elif summary_type == "Combinado":
|
60 |
if model_name not in summarizers:
|
61 |
return "Modelo no disponible para resumen abstractivo."
|
62 |
-
|
63 |
-
# Paso 1: Generar puntos clave con el método extractivo
|
64 |
-
extractive_keypoints = TFIDFSummarizer().summarize(
|
65 |
preprocessor.split_into_sentences(cleaned_text),
|
66 |
preprocessor.clean_sentences(preprocessor.split_into_sentences(cleaned_text)),
|
67 |
num_sentences,
|
68 |
)
|
69 |
-
|
70 |
-
# Paso 2: Generar el resumen abstractivo guiado por los puntos clave
|
71 |
-
combined_input = f"{cleaned_text}\n\nPuntos clave: {extractive_keypoints}"
|
72 |
return handle_long_text(
|
73 |
-
|
74 |
-
summarizers[model_name][0],
|
75 |
-
summarizers[model_name][1],
|
76 |
max_length=max_length,
|
77 |
stride=128,
|
78 |
)
|
79 |
|
80 |
-
|
81 |
return "Seleccione un tipo de resumen válido."
|
82 |
|
83 |
# Interfaz dinámica
|
@@ -86,17 +81,36 @@ with gr.Blocks() as interface:
|
|
86 |
|
87 |
# Entrada de texto o archivo
|
88 |
with gr.Row():
|
89 |
-
input_text = gr.Textbox(
|
90 |
file = gr.File(label="Subir archivo (PDF, TXT)")
|
91 |
|
92 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
93 |
summary_type = gr.Radio(
|
94 |
["Extractivo", "Abstractivo", "Combinado"],
|
95 |
label="Tipo de resumen",
|
96 |
value="Extractivo",
|
97 |
)
|
98 |
-
|
99 |
-
# Opciones dinámicas
|
100 |
method = gr.Radio(
|
101 |
["TF-IDF", "TextRank", "BERT", "TF-IDF + TextRank"],
|
102 |
label="Método Extractivo",
|
@@ -117,7 +131,6 @@ with gr.Blocks() as interface:
|
|
117 |
1, 10, value=4, step=1, label="Número de haces (Abstractivo)", visible=False
|
118 |
)
|
119 |
|
120 |
-
# Actualización dinámica de opciones
|
121 |
def update_options(summary_type):
|
122 |
if summary_type == "Extractivo":
|
123 |
return (
|
@@ -135,16 +148,14 @@ with gr.Blocks() as interface:
|
|
135 |
gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False),
|
136 |
gr.update(visible=False))
|
137 |
|
138 |
-
|
139 |
summary_type.change(
|
140 |
update_options,
|
141 |
inputs=[summary_type],
|
142 |
outputs=[method, num_sentences, model_name, max_length, num_beams],
|
143 |
)
|
144 |
|
145 |
-
# Botón para generar resumen
|
146 |
summarize_button = gr.Button("Generar Resumen")
|
147 |
-
output = gr.Textbox(lines=10, label="Resumen generado", interactive=
|
148 |
copy_button = gr.Button("Copiar Resumen")
|
149 |
|
150 |
summarize_button.click(
|
@@ -153,7 +164,6 @@ with gr.Blocks() as interface:
|
|
153 |
outputs=output,
|
154 |
)
|
155 |
|
156 |
-
# Funcionalidad para copiar resumen
|
157 |
def copy_summary(summary):
|
158 |
return summary
|
159 |
|
|
|
3 |
from modules.abstractive import load_summarizers, abstractive_summary
|
4 |
from modules.preprocessing import Preprocessor, PDFProcessor
|
5 |
from modules.utils import handle_long_text
|
|
|
6 |
|
7 |
# Cargar modelos abstractivos finetuneados
|
8 |
summarizers = load_summarizers()
|
9 |
|
10 |
+
# Función para procesar el archivo cargado
|
11 |
+
def process_file(file):
|
12 |
+
if file is not None:
|
13 |
+
pdf_processor = PDFProcessor()
|
14 |
+
input_text = pdf_processor.pdf_to_text(file.name)
|
15 |
+
return input_text
|
16 |
+
return "Por favor, cargue un archivo válido."
|
17 |
+
|
18 |
# Función principal para generar resúmenes
|
19 |
def summarize(input_text, file, summary_type, method, num_sentences, model_name, max_length, num_beams):
|
20 |
preprocessor = Preprocessor()
|
21 |
|
|
|
22 |
if file is not None:
|
23 |
pdf_processor = PDFProcessor()
|
24 |
input_text = pdf_processor.pdf_to_text(file.name)
|
25 |
|
|
|
26 |
if not input_text:
|
27 |
return "Por favor, ingrese texto o cargue un archivo válido."
|
28 |
|
|
|
29 |
cleaned_text = preprocessor.clean_text(input_text)
|
30 |
|
31 |
if summary_type == "Extractivo":
|
|
|
40 |
else:
|
41 |
return "Método no válido para resumen extractivo."
|
42 |
|
|
|
43 |
return summarizer.summarize(
|
44 |
preprocessor.split_into_sentences(cleaned_text),
|
45 |
preprocessor.clean_sentences(preprocessor.split_into_sentences(cleaned_text)),
|
|
|
49 |
elif summary_type == "Abstractivo":
|
50 |
if model_name not in summarizers:
|
51 |
return "Modelo no disponible para resumen abstractivo."
|
|
|
52 |
return handle_long_text(
|
53 |
cleaned_text,
|
54 |
+
summarizers[model_name][0],
|
55 |
+
summarizers[model_name][1],
|
56 |
max_length=max_length,
|
57 |
stride=128,
|
58 |
)
|
59 |
|
|
|
60 |
elif summary_type == "Combinado":
|
61 |
if model_name not in summarizers:
|
62 |
return "Modelo no disponible para resumen abstractivo."
|
63 |
+
extractive_summary = TFIDFSummarizer().summarize(
|
|
|
|
|
64 |
preprocessor.split_into_sentences(cleaned_text),
|
65 |
preprocessor.clean_sentences(preprocessor.split_into_sentences(cleaned_text)),
|
66 |
num_sentences,
|
67 |
)
|
|
|
|
|
|
|
68 |
return handle_long_text(
|
69 |
+
extractive_summary,
|
70 |
+
summarizers[model_name][0],
|
71 |
+
summarizers[model_name][1],
|
72 |
max_length=max_length,
|
73 |
stride=128,
|
74 |
)
|
75 |
|
|
|
76 |
return "Seleccione un tipo de resumen válido."
|
77 |
|
78 |
# Interfaz dinámica
|
|
|
81 |
|
82 |
# Entrada de texto o archivo
|
83 |
with gr.Row():
|
84 |
+
input_text = gr.Textbox(max_lines=9, label="Ingrese texto", interactive=True)
|
85 |
file = gr.File(label="Subir archivo (PDF, TXT)")
|
86 |
|
87 |
+
# Nuevo botón para cargar el archivo, inicialmente invisible
|
88 |
+
load_file_button = gr.Button("Cargar Archivo", visible=False)
|
89 |
+
|
90 |
+
# Acción del botón: procesar el archivo y colocar el texto en la caja de texto
|
91 |
+
load_file_button.click(
|
92 |
+
process_file,
|
93 |
+
inputs=[file],
|
94 |
+
outputs=[input_text],
|
95 |
+
)
|
96 |
+
|
97 |
+
# Mostrar el botón solo cuando se suba un archivo
|
98 |
+
def toggle_load_button(file):
|
99 |
+
return gr.update(visible=file is not None)
|
100 |
+
|
101 |
+
file.change(
|
102 |
+
toggle_load_button,
|
103 |
+
inputs=[file],
|
104 |
+
outputs=[load_file_button],
|
105 |
+
)
|
106 |
+
|
107 |
+
# Selección de tipo de resumen y opciones dinámicas
|
108 |
summary_type = gr.Radio(
|
109 |
["Extractivo", "Abstractivo", "Combinado"],
|
110 |
label="Tipo de resumen",
|
111 |
value="Extractivo",
|
112 |
)
|
113 |
+
|
|
|
114 |
method = gr.Radio(
|
115 |
["TF-IDF", "TextRank", "BERT", "TF-IDF + TextRank"],
|
116 |
label="Método Extractivo",
|
|
|
131 |
1, 10, value=4, step=1, label="Número de haces (Abstractivo)", visible=False
|
132 |
)
|
133 |
|
|
|
134 |
def update_options(summary_type):
|
135 |
if summary_type == "Extractivo":
|
136 |
return (
|
|
|
148 |
gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False),
|
149 |
gr.update(visible=False))
|
150 |
|
|
|
151 |
summary_type.change(
|
152 |
update_options,
|
153 |
inputs=[summary_type],
|
154 |
outputs=[method, num_sentences, model_name, max_length, num_beams],
|
155 |
)
|
156 |
|
|
|
157 |
summarize_button = gr.Button("Generar Resumen")
|
158 |
+
output = gr.Textbox(lines=10, label="Resumen generado", interactive=False)
|
159 |
copy_button = gr.Button("Copiar Resumen")
|
160 |
|
161 |
summarize_button.click(
|
|
|
164 |
outputs=output,
|
165 |
)
|
166 |
|
|
|
167 |
def copy_summary(summary):
|
168 |
return summary
|
169 |
|