Spaces:

berito
/

am_text_summary

Running

am_text_summary / fast_text_summarizer.py

text summarization functionality added

02b2abd about 2 months ago

1.54 kB

	import fasttext
	import numpy as np
	from sklearn.metrics.pairwise import cosine_similarity

	class FastTextSummarizer:
	def __init__(self, model_file):
	self.model = fasttext.load_model(model_file)

	def sentence_embedding(self, sentence):
	words = sentence.split()
	word_vectors = [self.model.get_word_vector(word) for word in words if word in self.model.words]
	if word_vectors:
	return np.mean(word_vectors, axis=0)
	else:
	return np.zeros(self.model.get_dimension())

	def summarize(self, text, num_sentences=3):
	# Split text into sentences (adjust for Amharic, e.g., using '።')
	sentences = text.split('።') # Amharic sentence delimiter
	sentences = [sentence.strip() for sentence in sentences if sentence.strip()]
	# Compute embeddings for each sentence
	sentence_embeddings = [self.sentence_embedding(sentence) for sentence in sentences]
	# Compute document embedding as the mean of sentence embeddings
	document_embedding = np.mean(sentence_embeddings, axis=0)
	# Calculate similarity between each sentence and the document
	similarities = cosine_similarity([document_embedding], sentence_embeddings).flatten()
	# Rank sentences by similarity
	ranked_indices = similarities.argsort()[::-1]
	top_indices = ranked_indices[:num_sentences]
	# Extract the top sentences
	summary = '። '.join([sentences[i] for i in sorted(top_indices)]) + '።'
	return summary