Spaces:
Runtime error
Runtime error
quyip
commited on
Commit
·
02f8d21
1
Parent(s):
5949767
fix
Browse files- utils/summary_utils.py +26 -14
utils/summary_utils.py
CHANGED
@@ -3,12 +3,11 @@ from transformers import pipeline
|
|
3 |
|
4 |
from utils.tag_utils import filter_tags
|
5 |
|
6 |
-
AiSummaryVersion =
|
7 |
-
|
8 |
-
summarization_pipeline = pipeline("summarization", model="Falconsai/text_summarization")
|
9 |
en_translation_pipe = pipeline("translation", model="Helsinki-NLP/opus-mt-mul-en")
|
10 |
-
|
11 |
-
|
12 |
|
13 |
|
14 |
def summarize(id: str, text: str):
|
@@ -16,10 +15,11 @@ def summarize(id: str, text: str):
|
|
16 |
return {
|
17 |
"ver": AiSummaryVersion
|
18 |
}
|
19 |
-
summary = get_summarization(text) if len(text) >
|
20 |
translated = get_en_translation(summary)
|
21 |
-
|
22 |
-
|
|
|
23 |
tags = sorted(list(set(tags)))
|
24 |
|
25 |
value = {
|
@@ -33,8 +33,7 @@ def summarize(id: str, text: str):
|
|
33 |
|
34 |
def get_summarization(text: str):
|
35 |
try:
|
36 |
-
|
37 |
-
result = summarization_pipeline(text, max_length=500, min_length=100, do_sample=False)
|
38 |
return result[0]['summary_text'] if isinstance(result, list) else result['summary_text']
|
39 |
except:
|
40 |
return None
|
@@ -60,12 +59,25 @@ def is_english(text):
|
|
60 |
return False
|
61 |
|
62 |
|
63 |
-
def get_tags(text: str
|
64 |
if text is None:
|
65 |
return []
|
66 |
try:
|
67 |
-
|
68 |
-
|
69 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
70 |
except:
|
71 |
return []
|
|
|
3 |
|
4 |
from utils.tag_utils import filter_tags
|
5 |
|
6 |
+
AiSummaryVersion = 4
|
7 |
+
summarization_pipeline = pipeline("summarization", model="csebuetnlp/mT5_multilingual_XLSum")
|
|
|
8 |
en_translation_pipe = pipeline("translation", model="Helsinki-NLP/opus-mt-mul-en")
|
9 |
+
classification_pipe = pipeline("text-classification", model="Yueh-Huan/news-category-classification-distilbert")
|
10 |
+
text_to_tags_pipe = pipeline('text2text-generation', model='models/text2tags')
|
11 |
|
12 |
|
13 |
def summarize(id: str, text: str):
|
|
|
15 |
return {
|
16 |
"ver": AiSummaryVersion
|
17 |
}
|
18 |
+
summary = get_summarization(text) if len(text) > 100 else text
|
19 |
translated = get_en_translation(summary)
|
20 |
+
tags1 = get_classification(translated)
|
21 |
+
tags2 = get_tags(translated)
|
22 |
+
tags = filter_tags(tags1 + tags2)
|
23 |
tags = sorted(list(set(tags)))
|
24 |
|
25 |
value = {
|
|
|
33 |
|
34 |
def get_summarization(text: str):
|
35 |
try:
|
36 |
+
result = summarization_pipeline(text)
|
|
|
37 |
return result[0]['summary_text'] if isinstance(result, list) else result['summary_text']
|
38 |
except:
|
39 |
return None
|
|
|
59 |
return False
|
60 |
|
61 |
|
62 |
+
def get_tags(text: str):
|
63 |
if text is None:
|
64 |
return []
|
65 |
try:
|
66 |
+
result = text_to_tags_pipe(text)
|
67 |
+
tag_str = result[0]['generated_text'] if isinstance(result, list) else result['generated_text']
|
68 |
+
return [tag.strip() for tag in tag_str.split(',')]
|
69 |
+
except:
|
70 |
+
return []
|
71 |
+
|
72 |
+
|
73 |
+
def get_classification(text: str):
|
74 |
+
if text is None:
|
75 |
+
return []
|
76 |
+
try:
|
77 |
+
result = classification_pipe(text)
|
78 |
+
if isinstance(result, list):
|
79 |
+
return [tag['label'].strip() for tag in result if tag['score'] > 0.75]
|
80 |
+
else:
|
81 |
+
return [result['label'].strip()] if result['score'] > 0.75 else []
|
82 |
except:
|
83 |
return []
|