Spaces:

dexttttrees
/

Nubedepalabras

Build error

App Files Files Community

dexttttrees commited on Jul 17, 2024

Commit

03b1bed

verified ·

1 Parent(s): 8487a2e

Upload 2 files

Browse files

Files changed (2) hide show

app.py +140 -0
requirements.txt +7 -0

app.py ADDED Viewed

	@@ -0,0 +1,140 @@

+import gradio as gr
+import nltk
+from nltk.corpus import cess_esp, conll2002
+from nltk.tokenize import word_tokenize
+import stylecloud
+import matplotlib.pyplot as plt
+from fpdf import FPDF
+import re
+from collections import Counter
+import spacy
+import random
+import csv
+# Descargar recursos necesarios de nltk
+nltk.download('punkt')
+nltk.download('stopwords')
+nltk.download('cess_esp')
+nltk.download('conll2002')
+# Cargar el modelo de spaCy para español
+nlp = spacy.load('es_core_news_md')
+additional_stopwords = [
+    # Aquí puedes agregar más stopwords si lo deseas
+]
+# Función de preprocesamiento
+def preprocess_text(text):
+    text = text.lower()
+    text = re.sub(r'\W', ' ', text)
+    doc = nlp(text)
+    stop_words = set(nltk.corpus.stopwords.words('spanish')).union(set(additional_stopwords))
+    filtered_words = [token.lemma_ for token in doc if token.text not in stop_words and token.pos_ in ['VERB', 'ADJ', 'NOUN']]
+    return filtered_words
+# Obtener oraciones de ejemplo de múltiples corpus de nltk
+def get_example_sentences(word, num_sentences=1):
+    sentences = []
+    for corpus in [cess_esp, conll2002]:
+        for sent in corpus.sents():
+            if word in sent and len(word) > 1:
+                sentences.append(' '.join(sent))
+            if len(sentences) >= num_sentences:
+                break
+        if len(sentences) >= num_sentences:
+            break
+    return sentences
+# Función para generar la nube de palabras con estilo aleatorio
+def generate_random_style_cloud(words, filename):
+    text = ' '.join(words)
+    icons = ['fas fa-cloud', 'fas fa-star', 'fas fa-heart', 'fas fa-tree', 'fas fa-sun', 'fas fa-moon']
+    random_icon = random.choice(icons)
+    stylecloud.gen_stylecloud(text=text, icon_name=random_icon, output_name=filename)
+    img = plt.imread(filename)
+    plt.imshow(img)
+    plt.axis('off')
+    plt.show()
+# Crear el documento PDF
+class PDF(FPDF):
+    def header(self):
+        self.set_fill_color(200, 220, 255)
+        self.rect(0, 0, 10, 297, 'F')
+        self.rect(200, 0, 10, 297, 'F')
+    def footer(self):
+        self.set_y(-15)
+        self.set_font('Arial', 'I', 8)
+        self.cell(0, 10, f'Page {self.page_no()}', 0, 0, 'C')
+def add_text_to_pdf(pdf, text, title):
+    filtered_words = preprocess_text(text)
+    word_freq = Counter(filtered_words)
+    word_freq_file = f"word_freq_{title}.csv"
+    with open(word_freq_file, 'w') as f:
+        writer = csv.writer(f)
+        writer.writerow(['word', 'frequency'])
+        for word, freq in word_freq.items():
+            writer.writerow([word, freq])
+    cloud_filename = f'wordcloud_{title}.png'
+    generate_random_style_cloud(filtered_words, cloud_filename)
+    pdf.add_page()
+    pdf.set_font('Arial', 'B', 16)
+    pdf.cell(0, 10, title, ln=True, align='C')
+    pdf.set_draw_color(0, 0, 0)
+    pdf.set_line_width(0.5)
+    pdf.line(10, 25, 200, 25)
+    pdf.image(cloud_filename, x=15, y=30, w=180)
+    pdf.add_page()
+    pdf.set_font('Arial', 'B', 16)
+    pdf.cell(0, 10, "Oraciones de ejemplo", ln=True, align='C')
+    high_freq_words = sorted([word.upper() for word, freq in word_freq.most_common(20)])
+    pdf.set_font('Arial', 'B', 12)
+    pdf.set_fill_color(200, 200, 200)
+    pdf.cell(90, 10, 'PALABRA', 1, fill=True)
+    pdf.cell(0, 10, 'ORACIÓN DE EJEMPLO', 1, fill=True)
+    pdf.ln()
+    pdf.set_font('Arial', '', 12)
+    pdf.set_line_width(0.1)
+    for word in high_freq_words:
+        example_sent = get_example_sentences(word.lower())
+        if example_sent:
+            example_sentence = example_sent[0].replace(word.lower(), f'**{word}**').replace(word, f'**{word}**')
+            pdf.cell(90, 10, word, 1)
+            pdf.set_font('Arial', '', 10)
+            pdf.multi_cell(0, 10, example_sentence, 1)
+            pdf.set_font('Arial', 'I', 8)
+            pdf.cell(90, 10, '', 0)
+            pdf.cell(0, 10, 'Fuente: NLTK', 0)
+            pdf.set_font('Arial', '', 12)
+        else:
+            continue
+        pdf.ln()
+# Función principal para la interfaz de Gradio
+def create_pdf_from_text(text, title):
+    pdf = PDF()
+    add_text_to_pdf(pdf, text, title)
+    pdf_filename = f'{title}.pdf'
+    pdf.output(pdf_filename)
+    return pdf_filename
+# Interfaz de Gradio
+iface = gr.Interface(
+    fn=create_pdf_from_text,
+    inputs=[gr.inputs.Textbox(lines=10, label="Texto en Español"), gr.inputs.Textbox(label="Título")],
+    outputs=gr.outputs.File(label="Descargar PDF"),
+    title="Generador de PDFs con Nubes de Palabras"
+)
+if __name__ == "__main__":
+    iface.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+gradio
+nltk
+stylecloud
+matplotlib
+fpdf
+spacy
+es_core_news_md @ https://github.com/explosion/spacy-models/releases/download/es_core_news_md-3.2.0/es_core_news_md-3.2.0.tar.gz