Spaces:

berito
/

am_text_summary

Sleeping

App Files Files Community

berito commited on Jan 27

Commit

02b2abd

1 Parent(s): 0d71b77

text summarization functionality added

Browse files

Files changed (1) hide show

fast_text_summarizer.py +32 -0

fast_text_summarizer.py ADDED Viewed

	@@ -0,0 +1,32 @@

+import fasttext
+import numpy as np
+from sklearn.metrics.pairwise import cosine_similarity
+class FastTextSummarizer:
+    def __init__(self, model_file):
+        self.model = fasttext.load_model(model_file)
+    def sentence_embedding(self, sentence):
+        words = sentence.split()
+        word_vectors = [self.model.get_word_vector(word) for word in words if word in self.model.words]
+        if word_vectors:
+            return np.mean(word_vectors, axis=0)
+        else:
+            return np.zeros(self.model.get_dimension())
+    def summarize(self, text, num_sentences=3):
+        # Split text into sentences (adjust for Amharic, e.g., using '።')
+        sentences = text.split('።')  # Amharic sentence delimiter
+        sentences = [sentence.strip() for sentence in sentences if sentence.strip()]
+        # Compute embeddings for each sentence
+        sentence_embeddings = [self.sentence_embedding(sentence) for sentence in sentences]
+        # Compute document embedding as the mean of sentence embeddings
+        document_embedding = np.mean(sentence_embeddings, axis=0)
+        # Calculate similarity between each sentence and the document
+        similarities = cosine_similarity([document_embedding], sentence_embeddings).flatten()
+        # Rank sentences by similarity
+        ranked_indices = similarities.argsort()[::-1]
+        top_indices = ranked_indices[:num_sentences]
+        # Extract the top sentences
+        summary = '። '.join([sentences[i] for i in sorted(top_indices)]) + '።'
+        return summary