Spaces:
Build error
Build error
Upload 2 files
Browse files- app.py +140 -0
- requirements.txt +7 -0
app.py
ADDED
@@ -0,0 +1,140 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import nltk
|
3 |
+
from nltk.corpus import cess_esp, conll2002
|
4 |
+
from nltk.tokenize import word_tokenize
|
5 |
+
import stylecloud
|
6 |
+
import matplotlib.pyplot as plt
|
7 |
+
from fpdf import FPDF
|
8 |
+
import re
|
9 |
+
from collections import Counter
|
10 |
+
import spacy
|
11 |
+
import random
|
12 |
+
import csv
|
13 |
+
|
14 |
+
# Descargar recursos necesarios de nltk
|
15 |
+
nltk.download('punkt')
|
16 |
+
nltk.download('stopwords')
|
17 |
+
nltk.download('cess_esp')
|
18 |
+
nltk.download('conll2002')
|
19 |
+
|
20 |
+
# Cargar el modelo de spaCy para espa帽ol
|
21 |
+
nlp = spacy.load('es_core_news_md')
|
22 |
+
|
23 |
+
additional_stopwords = [
|
24 |
+
# Aqu铆 puedes agregar m谩s stopwords si lo deseas
|
25 |
+
]
|
26 |
+
|
27 |
+
# Funci贸n de preprocesamiento
|
28 |
+
def preprocess_text(text):
|
29 |
+
text = text.lower()
|
30 |
+
text = re.sub(r'\W', ' ', text)
|
31 |
+
doc = nlp(text)
|
32 |
+
stop_words = set(nltk.corpus.stopwords.words('spanish')).union(set(additional_stopwords))
|
33 |
+
filtered_words = [token.lemma_ for token in doc if token.text not in stop_words and token.pos_ in ['VERB', 'ADJ', 'NOUN']]
|
34 |
+
return filtered_words
|
35 |
+
|
36 |
+
# Obtener oraciones de ejemplo de m煤ltiples corpus de nltk
|
37 |
+
def get_example_sentences(word, num_sentences=1):
|
38 |
+
sentences = []
|
39 |
+
for corpus in [cess_esp, conll2002]:
|
40 |
+
for sent in corpus.sents():
|
41 |
+
if word in sent and len(word) > 1:
|
42 |
+
sentences.append(' '.join(sent))
|
43 |
+
if len(sentences) >= num_sentences:
|
44 |
+
break
|
45 |
+
if len(sentences) >= num_sentences:
|
46 |
+
break
|
47 |
+
return sentences
|
48 |
+
|
49 |
+
# Funci贸n para generar la nube de palabras con estilo aleatorio
|
50 |
+
def generate_random_style_cloud(words, filename):
|
51 |
+
text = ' '.join(words)
|
52 |
+
icons = ['fas fa-cloud', 'fas fa-star', 'fas fa-heart', 'fas fa-tree', 'fas fa-sun', 'fas fa-moon']
|
53 |
+
random_icon = random.choice(icons)
|
54 |
+
stylecloud.gen_stylecloud(text=text, icon_name=random_icon, output_name=filename)
|
55 |
+
img = plt.imread(filename)
|
56 |
+
plt.imshow(img)
|
57 |
+
plt.axis('off')
|
58 |
+
plt.show()
|
59 |
+
|
60 |
+
# Crear el documento PDF
|
61 |
+
class PDF(FPDF):
|
62 |
+
def header(self):
|
63 |
+
self.set_fill_color(200, 220, 255)
|
64 |
+
self.rect(0, 0, 10, 297, 'F')
|
65 |
+
self.rect(200, 0, 10, 297, 'F')
|
66 |
+
|
67 |
+
def footer(self):
|
68 |
+
self.set_y(-15)
|
69 |
+
self.set_font('Arial', 'I', 8)
|
70 |
+
self.cell(0, 10, f'Page {self.page_no()}', 0, 0, 'C')
|
71 |
+
|
72 |
+
def add_text_to_pdf(pdf, text, title):
|
73 |
+
filtered_words = preprocess_text(text)
|
74 |
+
word_freq = Counter(filtered_words)
|
75 |
+
word_freq_file = f"word_freq_{title}.csv"
|
76 |
+
|
77 |
+
with open(word_freq_file, 'w') as f:
|
78 |
+
writer = csv.writer(f)
|
79 |
+
writer.writerow(['word', 'frequency'])
|
80 |
+
for word, freq in word_freq.items():
|
81 |
+
writer.writerow([word, freq])
|
82 |
+
|
83 |
+
cloud_filename = f'wordcloud_{title}.png'
|
84 |
+
generate_random_style_cloud(filtered_words, cloud_filename)
|
85 |
+
|
86 |
+
pdf.add_page()
|
87 |
+
pdf.set_font('Arial', 'B', 16)
|
88 |
+
pdf.cell(0, 10, title, ln=True, align='C')
|
89 |
+
pdf.set_draw_color(0, 0, 0)
|
90 |
+
pdf.set_line_width(0.5)
|
91 |
+
pdf.line(10, 25, 200, 25)
|
92 |
+
pdf.image(cloud_filename, x=15, y=30, w=180)
|
93 |
+
|
94 |
+
pdf.add_page()
|
95 |
+
pdf.set_font('Arial', 'B', 16)
|
96 |
+
pdf.cell(0, 10, "Oraciones de ejemplo", ln=True, align='C')
|
97 |
+
|
98 |
+
high_freq_words = sorted([word.upper() for word, freq in word_freq.most_common(20)])
|
99 |
+
|
100 |
+
pdf.set_font('Arial', 'B', 12)
|
101 |
+
pdf.set_fill_color(200, 200, 200)
|
102 |
+
pdf.cell(90, 10, 'PALABRA', 1, fill=True)
|
103 |
+
pdf.cell(0, 10, 'ORACI脫N DE EJEMPLO', 1, fill=True)
|
104 |
+
pdf.ln()
|
105 |
+
|
106 |
+
pdf.set_font('Arial', '', 12)
|
107 |
+
pdf.set_line_width(0.1)
|
108 |
+
for word in high_freq_words:
|
109 |
+
example_sent = get_example_sentences(word.lower())
|
110 |
+
if example_sent:
|
111 |
+
example_sentence = example_sent[0].replace(word.lower(), f'**{word}**').replace(word, f'**{word}**')
|
112 |
+
pdf.cell(90, 10, word, 1)
|
113 |
+
pdf.set_font('Arial', '', 10)
|
114 |
+
pdf.multi_cell(0, 10, example_sentence, 1)
|
115 |
+
pdf.set_font('Arial', 'I', 8)
|
116 |
+
pdf.cell(90, 10, '', 0)
|
117 |
+
pdf.cell(0, 10, 'Fuente: NLTK', 0)
|
118 |
+
pdf.set_font('Arial', '', 12)
|
119 |
+
else:
|
120 |
+
continue
|
121 |
+
pdf.ln()
|
122 |
+
|
123 |
+
# Funci贸n principal para la interfaz de Gradio
|
124 |
+
def create_pdf_from_text(text, title):
|
125 |
+
pdf = PDF()
|
126 |
+
add_text_to_pdf(pdf, text, title)
|
127 |
+
pdf_filename = f'{title}.pdf'
|
128 |
+
pdf.output(pdf_filename)
|
129 |
+
return pdf_filename
|
130 |
+
|
131 |
+
# Interfaz de Gradio
|
132 |
+
iface = gr.Interface(
|
133 |
+
fn=create_pdf_from_text,
|
134 |
+
inputs=[gr.inputs.Textbox(lines=10, label="Texto en Espa帽ol"), gr.inputs.Textbox(label="T铆tulo")],
|
135 |
+
outputs=gr.outputs.File(label="Descargar PDF"),
|
136 |
+
title="Generador de PDFs con Nubes de Palabras"
|
137 |
+
)
|
138 |
+
|
139 |
+
if __name__ == "__main__":
|
140 |
+
iface.launch()
|
requirements.txt
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
gradio
|
2 |
+
nltk
|
3 |
+
stylecloud
|
4 |
+
matplotlib
|
5 |
+
fpdf
|
6 |
+
spacy
|
7 |
+
es_core_news_md @ https://github.com/explosion/spacy-models/releases/download/es_core_news_md-3.2.0/es_core_news_md-3.2.0.tar.gz
|