Spaces:
Build error
Build error
import gradio as gr | |
import nltk | |
from nltk.corpus import cess_esp, conll2002 | |
from nltk.tokenize import word_tokenize | |
import stylecloud | |
import matplotlib.pyplot as plt | |
from fpdf import FPDF | |
import re | |
from collections import Counter | |
import spacy | |
import random | |
import csv | |
# Descargar recursos necesarios de nltk | |
nltk.download('punkt') | |
nltk.download('stopwords') | |
nltk.download('cess_esp') | |
nltk.download('conll2002') | |
# Cargar el modelo de spaCy para español | |
nlp = spacy.load('es_core_news_md') | |
additional_stopwords = [ | |
# Aquí puedes agregar más stopwords si lo deseas | |
] | |
# Función de preprocesamiento | |
def preprocess_text(text): | |
text = text.lower() | |
text = re.sub(r'\W', ' ', text) | |
doc = nlp(text) | |
stop_words = set(nltk.corpus.stopwords.words('spanish')).union(set(additional_stopwords)) | |
filtered_words = [token.lemma_ for token in doc if token.text not in stop_words and token.pos_ in ['VERB', 'ADJ', 'NOUN']] | |
return filtered_words | |
# Obtener oraciones de ejemplo de múltiples corpus de nltk | |
def get_example_sentences(word, num_sentences=1): | |
sentences = [] | |
for corpus in [cess_esp, conll2002]: | |
for sent in corpus.sents(): | |
if word in sent and len(word) > 1: | |
sentences.append(' '.join(sent)) | |
if len(sentences) >= num_sentences: | |
break | |
if len(sentences) >= num_sentences: | |
break | |
return sentences | |
# Función para generar la nube de palabras con estilo aleatorio | |
def generate_random_style_cloud(words, filename): | |
text = ' '.join(words) | |
icons = ['fas fa-cloud', 'fas fa-star', 'fas fa-heart', 'fas fa-tree', 'fas fa-sun', 'fas fa-moon'] | |
random_icon = random.choice(icons) | |
stylecloud.gen_stylecloud(text=text, icon_name=random_icon, output_name=filename) | |
img = plt.imread(filename) | |
plt.imshow(img) | |
plt.axis('off') | |
plt.show() | |
# Crear el documento PDF | |
class PDF(FPDF): | |
def header(self): | |
self.set_fill_color(200, 220, 255) | |
self.rect(0, 0, 10, 297, 'F') | |
self.rect(200, 0, 10, 297, 'F') | |
def footer(self): | |
self.set_y(-15) | |
self.set_font('Arial', 'I', 8) | |
self.cell(0, 10, f'Page {self.page_no()}', 0, 0, 'C') | |
def add_text_to_pdf(pdf, text, title): | |
filtered_words = preprocess_text(text) | |
word_freq = Counter(filtered_words) | |
word_freq_file = f"word_freq_{title}.csv" | |
with open(word_freq_file, 'w') as f: | |
writer = csv.writer(f) | |
writer.writerow(['word', 'frequency']) | |
for word, freq in word_freq.items(): | |
writer.writerow([word, freq]) | |
cloud_filename = f'wordcloud_{title}.png' | |
generate_random_style_cloud(filtered_words, cloud_filename) | |
pdf.add_page() | |
pdf.set_font('Arial', 'B', 16) | |
pdf.cell(0, 10, title, ln=True, align='C') | |
pdf.set_draw_color(0, 0, 0) | |
pdf.set_line_width(0.5) | |
pdf.line(10, 25, 200, 25) | |
pdf.image(cloud_filename, x=15, y=30, w=180) | |
pdf.add_page() | |
pdf.set_font('Arial', 'B', 16) | |
pdf.cell(0, 10, "Oraciones de ejemplo", ln=True, align='C') | |
high_freq_words = sorted([word.upper() for word, freq in word_freq.most_common(20)]) | |
pdf.set_font('Arial', 'B', 12) | |
pdf.set_fill_color(200, 200, 200) | |
pdf.cell(90, 10, 'PALABRA', 1, fill=True) | |
pdf.cell(0, 10, 'ORACIÓN DE EJEMPLO', 1, fill=True) | |
pdf.ln() | |
pdf.set_font('Arial', '', 12) | |
pdf.set_line_width(0.1) | |
for word in high_freq_words: | |
example_sent = get_example_sentences(word.lower()) | |
if example_sent: | |
example_sentence = example_sent[0].replace(word.lower(), f'**{word}**').replace(word, f'**{word}**') | |
pdf.cell(90, 10, word, 1) | |
pdf.set_font('Arial', '', 10) | |
pdf.multi_cell(0, 10, example_sentence, 1) | |
pdf.set_font('Arial', 'I', 8) | |
pdf.cell(90, 10, '', 0) | |
pdf.cell(0, 10, 'Fuente: NLTK', 0) | |
pdf.set_font('Arial', '', 12) | |
else: | |
continue | |
pdf.ln() | |
# Función principal para la interfaz de Gradio | |
def create_pdf_from_text(text, title): | |
pdf = PDF() | |
add_text_to_pdf(pdf, text, title) | |
pdf_filename = f'{title}.pdf' | |
pdf.output(pdf_filename) | |
return pdf_filename | |
# Interfaz de Gradio | |
iface = gr.Interface( | |
fn=create_pdf_from_text, | |
inputs=[gr.inputs.Textbox(lines=10, label="Texto en Español"), gr.inputs.Textbox(label="Título")], | |
outputs=gr.outputs.File(label="Descargar PDF"), | |
title="Generador de PDFs con Nubes de Palabras" | |
) | |
if __name__ == "__main__": | |
iface.launch() | |