import gradio as gr import nltk from nltk.corpus import cess_esp, conll2002 from nltk.tokenize import word_tokenize import stylecloud import matplotlib.pyplot as plt from fpdf import FPDF import re from collections import Counter import spacy import random import csv # Descargar recursos necesarios de nltk nltk.download('punkt') nltk.download('stopwords') nltk.download('cess_esp') nltk.download('conll2002') # Cargar el modelo de spaCy para español nlp = spacy.load('es_core_news_md') additional_stopwords = [ # Aquí puedes agregar más stopwords si lo deseas ] # Función de preprocesamiento def preprocess_text(text): text = text.lower() text = re.sub(r'\W', ' ', text) doc = nlp(text) stop_words = set(nltk.corpus.stopwords.words('spanish')).union(set(additional_stopwords)) filtered_words = [token.lemma_ for token in doc if token.text not in stop_words and token.pos_ in ['VERB', 'ADJ', 'NOUN']] return filtered_words # Obtener oraciones de ejemplo de múltiples corpus de nltk def get_example_sentences(word, num_sentences=1): sentences = [] for corpus in [cess_esp, conll2002]: for sent in corpus.sents(): if word in sent and len(word) > 1: sentences.append(' '.join(sent)) if len(sentences) >= num_sentences: break if len(sentences) >= num_sentences: break return sentences # Función para generar la nube de palabras con estilo aleatorio def generate_random_style_cloud(words, filename): text = ' '.join(words) icons = ['fas fa-cloud', 'fas fa-star', 'fas fa-heart', 'fas fa-tree', 'fas fa-sun', 'fas fa-moon'] random_icon = random.choice(icons) stylecloud.gen_stylecloud(text=text, icon_name=random_icon, output_name=filename) img = plt.imread(filename) plt.imshow(img) plt.axis('off') plt.show() # Crear el documento PDF class PDF(FPDF): def header(self): self.set_fill_color(200, 220, 255) self.rect(0, 0, 10, 297, 'F') self.rect(200, 0, 10, 297, 'F') def footer(self): self.set_y(-15) self.set_font('Arial', 'I', 8) self.cell(0, 10, f'Page {self.page_no()}', 0, 0, 'C') def add_text_to_pdf(pdf, text, title): filtered_words = preprocess_text(text) word_freq = Counter(filtered_words) word_freq_file = f"word_freq_{title}.csv" with open(word_freq_file, 'w') as f: writer = csv.writer(f) writer.writerow(['word', 'frequency']) for word, freq in word_freq.items(): writer.writerow([word, freq]) cloud_filename = f'wordcloud_{title}.png' generate_random_style_cloud(filtered_words, cloud_filename) pdf.add_page() pdf.set_font('Arial', 'B', 16) pdf.cell(0, 10, title, ln=True, align='C') pdf.set_draw_color(0, 0, 0) pdf.set_line_width(0.5) pdf.line(10, 25, 200, 25) pdf.image(cloud_filename, x=15, y=30, w=180) pdf.add_page() pdf.set_font('Arial', 'B', 16) pdf.cell(0, 10, "Oraciones de ejemplo", ln=True, align='C') high_freq_words = sorted([word.upper() for word, freq in word_freq.most_common(20)]) pdf.set_font('Arial', 'B', 12) pdf.set_fill_color(200, 200, 200) pdf.cell(90, 10, 'PALABRA', 1, fill=True) pdf.cell(0, 10, 'ORACIÓN DE EJEMPLO', 1, fill=True) pdf.ln() pdf.set_font('Arial', '', 12) pdf.set_line_width(0.1) for word in high_freq_words: example_sent = get_example_sentences(word.lower()) if example_sent: example_sentence = example_sent[0].replace(word.lower(), f'**{word}**').replace(word, f'**{word}**') pdf.cell(90, 10, word, 1) pdf.set_font('Arial', '', 10) pdf.multi_cell(0, 10, example_sentence, 1) pdf.set_font('Arial', 'I', 8) pdf.cell(90, 10, '', 0) pdf.cell(0, 10, 'Fuente: NLTK', 0) pdf.set_font('Arial', '', 12) else: continue pdf.ln() # Función principal para la interfaz de Gradio def create_pdf_from_text(text, title): pdf = PDF() add_text_to_pdf(pdf, text, title) pdf_filename = f'{title}.pdf' pdf.output(pdf_filename) return pdf_filename # Interfaz de Gradio iface = gr.Interface( fn=create_pdf_from_text, inputs=[gr.inputs.Textbox(lines=10, label="Texto en Español"), gr.inputs.Textbox(label="Título")], outputs=gr.outputs.File(label="Descargar PDF"), title="Generador de PDFs con Nubes de Palabras" ) if __name__ == "__main__": iface.launch()