dexttttrees commited on
Commit
03b1bed
verified
1 Parent(s): 8487a2e

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +140 -0
  2. requirements.txt +7 -0
app.py ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import nltk
3
+ from nltk.corpus import cess_esp, conll2002
4
+ from nltk.tokenize import word_tokenize
5
+ import stylecloud
6
+ import matplotlib.pyplot as plt
7
+ from fpdf import FPDF
8
+ import re
9
+ from collections import Counter
10
+ import spacy
11
+ import random
12
+ import csv
13
+
14
+ # Descargar recursos necesarios de nltk
15
+ nltk.download('punkt')
16
+ nltk.download('stopwords')
17
+ nltk.download('cess_esp')
18
+ nltk.download('conll2002')
19
+
20
+ # Cargar el modelo de spaCy para espa帽ol
21
+ nlp = spacy.load('es_core_news_md')
22
+
23
+ additional_stopwords = [
24
+ # Aqu铆 puedes agregar m谩s stopwords si lo deseas
25
+ ]
26
+
27
+ # Funci贸n de preprocesamiento
28
+ def preprocess_text(text):
29
+ text = text.lower()
30
+ text = re.sub(r'\W', ' ', text)
31
+ doc = nlp(text)
32
+ stop_words = set(nltk.corpus.stopwords.words('spanish')).union(set(additional_stopwords))
33
+ filtered_words = [token.lemma_ for token in doc if token.text not in stop_words and token.pos_ in ['VERB', 'ADJ', 'NOUN']]
34
+ return filtered_words
35
+
36
+ # Obtener oraciones de ejemplo de m煤ltiples corpus de nltk
37
+ def get_example_sentences(word, num_sentences=1):
38
+ sentences = []
39
+ for corpus in [cess_esp, conll2002]:
40
+ for sent in corpus.sents():
41
+ if word in sent and len(word) > 1:
42
+ sentences.append(' '.join(sent))
43
+ if len(sentences) >= num_sentences:
44
+ break
45
+ if len(sentences) >= num_sentences:
46
+ break
47
+ return sentences
48
+
49
+ # Funci贸n para generar la nube de palabras con estilo aleatorio
50
+ def generate_random_style_cloud(words, filename):
51
+ text = ' '.join(words)
52
+ icons = ['fas fa-cloud', 'fas fa-star', 'fas fa-heart', 'fas fa-tree', 'fas fa-sun', 'fas fa-moon']
53
+ random_icon = random.choice(icons)
54
+ stylecloud.gen_stylecloud(text=text, icon_name=random_icon, output_name=filename)
55
+ img = plt.imread(filename)
56
+ plt.imshow(img)
57
+ plt.axis('off')
58
+ plt.show()
59
+
60
+ # Crear el documento PDF
61
+ class PDF(FPDF):
62
+ def header(self):
63
+ self.set_fill_color(200, 220, 255)
64
+ self.rect(0, 0, 10, 297, 'F')
65
+ self.rect(200, 0, 10, 297, 'F')
66
+
67
+ def footer(self):
68
+ self.set_y(-15)
69
+ self.set_font('Arial', 'I', 8)
70
+ self.cell(0, 10, f'Page {self.page_no()}', 0, 0, 'C')
71
+
72
+ def add_text_to_pdf(pdf, text, title):
73
+ filtered_words = preprocess_text(text)
74
+ word_freq = Counter(filtered_words)
75
+ word_freq_file = f"word_freq_{title}.csv"
76
+
77
+ with open(word_freq_file, 'w') as f:
78
+ writer = csv.writer(f)
79
+ writer.writerow(['word', 'frequency'])
80
+ for word, freq in word_freq.items():
81
+ writer.writerow([word, freq])
82
+
83
+ cloud_filename = f'wordcloud_{title}.png'
84
+ generate_random_style_cloud(filtered_words, cloud_filename)
85
+
86
+ pdf.add_page()
87
+ pdf.set_font('Arial', 'B', 16)
88
+ pdf.cell(0, 10, title, ln=True, align='C')
89
+ pdf.set_draw_color(0, 0, 0)
90
+ pdf.set_line_width(0.5)
91
+ pdf.line(10, 25, 200, 25)
92
+ pdf.image(cloud_filename, x=15, y=30, w=180)
93
+
94
+ pdf.add_page()
95
+ pdf.set_font('Arial', 'B', 16)
96
+ pdf.cell(0, 10, "Oraciones de ejemplo", ln=True, align='C')
97
+
98
+ high_freq_words = sorted([word.upper() for word, freq in word_freq.most_common(20)])
99
+
100
+ pdf.set_font('Arial', 'B', 12)
101
+ pdf.set_fill_color(200, 200, 200)
102
+ pdf.cell(90, 10, 'PALABRA', 1, fill=True)
103
+ pdf.cell(0, 10, 'ORACI脫N DE EJEMPLO', 1, fill=True)
104
+ pdf.ln()
105
+
106
+ pdf.set_font('Arial', '', 12)
107
+ pdf.set_line_width(0.1)
108
+ for word in high_freq_words:
109
+ example_sent = get_example_sentences(word.lower())
110
+ if example_sent:
111
+ example_sentence = example_sent[0].replace(word.lower(), f'**{word}**').replace(word, f'**{word}**')
112
+ pdf.cell(90, 10, word, 1)
113
+ pdf.set_font('Arial', '', 10)
114
+ pdf.multi_cell(0, 10, example_sentence, 1)
115
+ pdf.set_font('Arial', 'I', 8)
116
+ pdf.cell(90, 10, '', 0)
117
+ pdf.cell(0, 10, 'Fuente: NLTK', 0)
118
+ pdf.set_font('Arial', '', 12)
119
+ else:
120
+ continue
121
+ pdf.ln()
122
+
123
+ # Funci贸n principal para la interfaz de Gradio
124
+ def create_pdf_from_text(text, title):
125
+ pdf = PDF()
126
+ add_text_to_pdf(pdf, text, title)
127
+ pdf_filename = f'{title}.pdf'
128
+ pdf.output(pdf_filename)
129
+ return pdf_filename
130
+
131
+ # Interfaz de Gradio
132
+ iface = gr.Interface(
133
+ fn=create_pdf_from_text,
134
+ inputs=[gr.inputs.Textbox(lines=10, label="Texto en Espa帽ol"), gr.inputs.Textbox(label="T铆tulo")],
135
+ outputs=gr.outputs.File(label="Descargar PDF"),
136
+ title="Generador de PDFs con Nubes de Palabras"
137
+ )
138
+
139
+ if __name__ == "__main__":
140
+ iface.launch()
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ gradio
2
+ nltk
3
+ stylecloud
4
+ matplotlib
5
+ fpdf
6
+ spacy
7
+ es_core_news_md @ https://github.com/explosion/spacy-models/releases/download/es_core_news_md-3.2.0/es_core_news_md-3.2.0.tar.gz