Spaces:

dexttttrees
/

Nubedepalabras

Build error

App Files Files Community

Nubedepalabras / app.py

dexttttrees

Upload 2 files

03b1bed verified 12 months ago

raw

history blame contribute delete

4.68 kB

	import gradio as gr
	import nltk
	from nltk.corpus import cess_esp, conll2002
	from nltk.tokenize import word_tokenize
	import stylecloud
	import matplotlib.pyplot as plt
	from fpdf import FPDF
	import re
	from collections import Counter
	import spacy
	import random
	import csv

	# Descargar recursos necesarios de nltk
	nltk.download('punkt')
	nltk.download('stopwords')
	nltk.download('cess_esp')
	nltk.download('conll2002')

	# Cargar el modelo de spaCy para español
	nlp = spacy.load('es_core_news_md')

	additional_stopwords = [
	# Aquí puedes agregar más stopwords si lo deseas
	]

	# Función de preprocesamiento
	def preprocess_text(text):
	text = text.lower()
	text = re.sub(r'\W', ' ', text)
	doc = nlp(text)
	stop_words = set(nltk.corpus.stopwords.words('spanish')).union(set(additional_stopwords))
	filtered_words = [token.lemma_ for token in doc if token.text not in stop_words and token.pos_ in ['VERB', 'ADJ', 'NOUN']]
	return filtered_words

	# Obtener oraciones de ejemplo de múltiples corpus de nltk
	def get_example_sentences(word, num_sentences=1):
	sentences = []
	for corpus in [cess_esp, conll2002]:
	for sent in corpus.sents():
	if word in sent and len(word) > 1:
	sentences.append(' '.join(sent))
	if len(sentences) >= num_sentences:
	break
	if len(sentences) >= num_sentences:
	break
	return sentences

	# Función para generar la nube de palabras con estilo aleatorio
	def generate_random_style_cloud(words, filename):
	text = ' '.join(words)
	icons = ['fas fa-cloud', 'fas fa-star', 'fas fa-heart', 'fas fa-tree', 'fas fa-sun', 'fas fa-moon']
	random_icon = random.choice(icons)
	stylecloud.gen_stylecloud(text=text, icon_name=random_icon, output_name=filename)
	img = plt.imread(filename)
	plt.imshow(img)
	plt.axis('off')
	plt.show()

	# Crear el documento PDF
	class PDF(FPDF):
	def header(self):
	self.set_fill_color(200, 220, 255)
	self.rect(0, 0, 10, 297, 'F')
	self.rect(200, 0, 10, 297, 'F')

	def footer(self):
	self.set_y(-15)
	self.set_font('Arial', 'I', 8)
	self.cell(0, 10, f'Page {self.page_no()}', 0, 0, 'C')

	def add_text_to_pdf(pdf, text, title):
	filtered_words = preprocess_text(text)
	word_freq = Counter(filtered_words)
	word_freq_file = f"word_freq_{title}.csv"

	with open(word_freq_file, 'w') as f:
	writer = csv.writer(f)
	writer.writerow(['word', 'frequency'])
	for word, freq in word_freq.items():
	writer.writerow([word, freq])

	cloud_filename = f'wordcloud_{title}.png'
	generate_random_style_cloud(filtered_words, cloud_filename)

	pdf.add_page()
	pdf.set_font('Arial', 'B', 16)
	pdf.cell(0, 10, title, ln=True, align='C')
	pdf.set_draw_color(0, 0, 0)
	pdf.set_line_width(0.5)
	pdf.line(10, 25, 200, 25)
	pdf.image(cloud_filename, x=15, y=30, w=180)

	pdf.add_page()
	pdf.set_font('Arial', 'B', 16)
	pdf.cell(0, 10, "Oraciones de ejemplo", ln=True, align='C')

	high_freq_words = sorted([word.upper() for word, freq in word_freq.most_common(20)])

	pdf.set_font('Arial', 'B', 12)
	pdf.set_fill_color(200, 200, 200)
	pdf.cell(90, 10, 'PALABRA', 1, fill=True)
	pdf.cell(0, 10, 'ORACIÓN DE EJEMPLO', 1, fill=True)
	pdf.ln()

	pdf.set_font('Arial', '', 12)
	pdf.set_line_width(0.1)
	for word in high_freq_words:
	example_sent = get_example_sentences(word.lower())
	if example_sent:
	example_sentence = example_sent[0].replace(word.lower(), f'{word}').replace(word, f'{word}')
	pdf.cell(90, 10, word, 1)
	pdf.set_font('Arial', '', 10)
	pdf.multi_cell(0, 10, example_sentence, 1)
	pdf.set_font('Arial', 'I', 8)
	pdf.cell(90, 10, '', 0)
	pdf.cell(0, 10, 'Fuente: NLTK', 0)
	pdf.set_font('Arial', '', 12)
	else:
	continue
	pdf.ln()

	# Función principal para la interfaz de Gradio
	def create_pdf_from_text(text, title):
	pdf = PDF()
	add_text_to_pdf(pdf, text, title)
	pdf_filename = f'{title}.pdf'
	pdf.output(pdf_filename)
	return pdf_filename

	# Interfaz de Gradio
	iface = gr.Interface(
	fn=create_pdf_from_text,
	inputs=[gr.inputs.Textbox(lines=10, label="Texto en Español"), gr.inputs.Textbox(label="Título")],
	outputs=gr.outputs.File(label="Descargar PDF"),
	title="Generador de PDFs con Nubes de Palabras"
	)

	if __name__ == "__main__":
	iface.launch()