codeteach commited on
Commit
580e12f
·
verified ·
1 Parent(s): 44d5900

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -3
app.py CHANGED
@@ -2,9 +2,11 @@ import gradio as gr
2
  from transformers import pipeline, AutoTokenizer
3
  from sentence_transformers import SentenceTransformer, util
4
  import nltk
5
- nltk.download('punkt')
6
  from nltk.tokenize import sent_tokenize
7
 
 
 
 
8
  # Translation models
9
  translation_models = {
10
  'Vietnamese': "Helsinki-NLP/opus-mt-en-vi",
@@ -27,8 +29,11 @@ def get_translator(language):
27
 
28
  # Helper function to generate bullet points
29
  def generate_bullet_points(text):
30
- model = SentenceTransformer('paraphrase-MiniLM-L6-v2')
31
  sentences = sent_tokenize(text)
 
 
 
32
  embeddings = model.encode(sentences, convert_to_tensor=True)
33
  clusters = util.community_detection(embeddings, threshold=0.75)
34
 
@@ -38,7 +43,10 @@ def generate_bullet_points(text):
38
  main_sentence = cluster_sentences[0] if cluster_sentences else ""
39
  bullet_points.append(main_sentence.strip())
40
 
41
- return "\n".join(f"- {point}" for point in bullet_points)
 
 
 
42
 
43
  # Helper function to split text into chunks
44
  def split_text(text, max_tokens=1024):
@@ -113,3 +121,4 @@ iface.launch()
113
 
114
 
115
 
 
 
2
  from transformers import pipeline, AutoTokenizer
3
  from sentence_transformers import SentenceTransformer, util
4
  import nltk
 
5
  from nltk.tokenize import sent_tokenize
6
 
7
+ # Download NLTK data
8
+ nltk.download('punkt')
9
+
10
  # Translation models
11
  translation_models = {
12
  'Vietnamese': "Helsinki-NLP/opus-mt-en-vi",
 
29
 
30
  # Helper function to generate bullet points
31
  def generate_bullet_points(text):
32
+ print("Original Text:", text)
33
  sentences = sent_tokenize(text)
34
+ print("Sentences:", sentences)
35
+
36
+ model = SentenceTransformer('paraphrase-MiniLM-L6-v2')
37
  embeddings = model.encode(sentences, convert_to_tensor=True)
38
  clusters = util.community_detection(embeddings, threshold=0.75)
39
 
 
43
  main_sentence = cluster_sentences[0] if cluster_sentences else ""
44
  bullet_points.append(main_sentence.strip())
45
 
46
+ result = "\n".join(f"- {point}" for point in bullet_points)
47
+ print("Bullet Points:", result)
48
+
49
+ return result
50
 
51
  # Helper function to split text into chunks
52
  def split_text(text, max_tokens=1024):
 
121
 
122
 
123
 
124
+