Spaces:

dexttttrees
/

Nubedepalabras

Build error

File size: 4,682 Bytes

03b1bed

import gradio as gr
import nltk
from nltk.corpus import cess_esp, conll2002
from nltk.tokenize import word_tokenize
import stylecloud
import matplotlib.pyplot as plt
from fpdf import FPDF
import re
from collections import Counter
import spacy
import random
import csv

# Descargar recursos necesarios de nltk
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('cess_esp')
nltk.download('conll2002')

# Cargar el modelo de spaCy para español
nlp = spacy.load('es_core_news_md')

additional_stopwords = [
    # Aquí puedes agregar más stopwords si lo deseas
]

# Función de preprocesamiento
def preprocess_text(text):
    text = text.lower()
    text = re.sub(r'\W', ' ', text)
    doc = nlp(text)
    stop_words = set(nltk.corpus.stopwords.words('spanish')).union(set(additional_stopwords))
    filtered_words = [token.lemma_ for token in doc if token.text not in stop_words and token.pos_ in ['VERB', 'ADJ', 'NOUN']]
    return filtered_words

# Obtener oraciones de ejemplo de múltiples corpus de nltk
def get_example_sentences(word, num_sentences=1):
    sentences = []
    for corpus in [cess_esp, conll2002]:
        for sent in corpus.sents():
            if word in sent and len(word) > 1:
                sentences.append(' '.join(sent))
            if len(sentences) >= num_sentences:
                break
        if len(sentences) >= num_sentences:
            break
    return sentences

# Función para generar la nube de palabras con estilo aleatorio
def generate_random_style_cloud(words, filename):
    text = ' '.join(words)
    icons = ['fas fa-cloud', 'fas fa-star', 'fas fa-heart', 'fas fa-tree', 'fas fa-sun', 'fas fa-moon']
    random_icon = random.choice(icons)
    stylecloud.gen_stylecloud(text=text, icon_name=random_icon, output_name=filename)
    img = plt.imread(filename)
    plt.imshow(img)
    plt.axis('off')
    plt.show()

# Crear el documento PDF
class PDF(FPDF):
    def header(self):
        self.set_fill_color(200, 220, 255)
        self.rect(0, 0, 10, 297, 'F')
        self.rect(200, 0, 10, 297, 'F')

    def footer(self):
        self.set_y(-15)
        self.set_font('Arial', 'I', 8)
        self.cell(0, 10, f'Page {self.page_no()}', 0, 0, 'C')

def add_text_to_pdf(pdf, text, title):
    filtered_words = preprocess_text(text)
    word_freq = Counter(filtered_words)
    word_freq_file = f"word_freq_{title}.csv"

    with open(word_freq_file, 'w') as f:
        writer = csv.writer(f)
        writer.writerow(['word', 'frequency'])
        for word, freq in word_freq.items():
            writer.writerow([word, freq])

    cloud_filename = f'wordcloud_{title}.png'
    generate_random_style_cloud(filtered_words, cloud_filename)

    pdf.add_page()
    pdf.set_font('Arial', 'B', 16)
    pdf.cell(0, 10, title, ln=True, align='C')
    pdf.set_draw_color(0, 0, 0)
    pdf.set_line_width(0.5)
    pdf.line(10, 25, 200, 25)
    pdf.image(cloud_filename, x=15, y=30, w=180)

    pdf.add_page()
    pdf.set_font('Arial', 'B', 16)
    pdf.cell(0, 10, "Oraciones de ejemplo", ln=True, align='C')

    high_freq_words = sorted([word.upper() for word, freq in word_freq.most_common(20)])

    pdf.set_font('Arial', 'B', 12)
    pdf.set_fill_color(200, 200, 200)
    pdf.cell(90, 10, 'PALABRA', 1, fill=True)
    pdf.cell(0, 10, 'ORACIÓN DE EJEMPLO', 1, fill=True)
    pdf.ln()

    pdf.set_font('Arial', '', 12)
    pdf.set_line_width(0.1)
    for word in high_freq_words:
        example_sent = get_example_sentences(word.lower())
        if example_sent:
            example_sentence = example_sent[0].replace(word.lower(), f'**{word}**').replace(word, f'**{word}**')
            pdf.cell(90, 10, word, 1)
            pdf.set_font('Arial', '', 10)
            pdf.multi_cell(0, 10, example_sentence, 1)
            pdf.set_font('Arial', 'I', 8)
            pdf.cell(90, 10, '', 0)
            pdf.cell(0, 10, 'Fuente: NLTK', 0)
            pdf.set_font('Arial', '', 12)
        else:
            continue
        pdf.ln()

# Función principal para la interfaz de Gradio
def create_pdf_from_text(text, title):
    pdf = PDF()
    add_text_to_pdf(pdf, text, title)
    pdf_filename = f'{title}.pdf'
    pdf.output(pdf_filename)
    return pdf_filename

# Interfaz de Gradio
iface = gr.Interface(
    fn=create_pdf_from_text,
    inputs=[gr.inputs.Textbox(lines=10, label="Texto en Español"), gr.inputs.Textbox(label="Título")],
    outputs=gr.outputs.File(label="Descargar PDF"),
    title="Generador de PDFs con Nubes de Palabras"
)

if __name__ == "__main__":
    iface.launch()