quyip commited on
Commit
7019e7f
·
1 Parent(s): 60a3519
Files changed (1) hide show
  1. utils/summary_utils.py +4 -12
utils/summary_utils.py CHANGED
@@ -1,17 +1,14 @@
1
- import re
2
-
3
  from langdetect import detect
4
  from transformers import pipeline
5
 
6
  from utils.tag_utils import filter_tags
7
 
8
- AiSummaryVersion = 2
9
  MinTagScore = 0.7
10
  summarization_pipeline = pipeline("summarization", model="Falconsai/text_summarization")
11
  en_translation_pipe = pipeline("translation", model="Helsinki-NLP/opus-mt-mul-en")
12
- tag_gen_pipe_1 = pipeline("text-classification", model="yiyanghkust/finbert-esg-9-categories")
13
- tag_gen_pipe_2 = pipeline("text-classification", model="dima806/news-category-classifier-distilbert")
14
- tag_gen_pipe_3 = pipeline("text-classification", model="elozano/bert-base-cased-news-category")
15
 
16
 
17
  def summarize(id: str, text: str):
@@ -19,14 +16,10 @@ def summarize(id: str, text: str):
19
  return {
20
  "ver": AiSummaryVersion
21
  }
22
- print('XXXXXXXXXXXXXXXXXXXXX text')
23
  summary = get_summarization(text) if len(text) > 3000 else text
24
- print('XXXXXXXXXXXXXXXXXXXXX summary')
25
- print(summary)
26
  translated = get_en_translation(summary)
27
  tags = get_tags(translated, id)
28
  tags = filter_tags(tags)
29
- print(tags)
30
  tags = sorted(list(set(tags)))
31
 
32
  value = {
@@ -73,7 +66,6 @@ def get_tags(text: str, id: str):
73
  try:
74
  tags1 = [tag['label'] for tag in tag_gen_pipe_1(text) if tag['score'] >= MinTagScore]
75
  tags2 = [tag['label'] for tag in tag_gen_pipe_2(text) if tag['score'] >= MinTagScore]
76
- tags3 = [tag['label'] for tag in tag_gen_pipe_3(text) if tag['score'] >= MinTagScore]
77
- return tags1 + tags2 + tags3
78
  except:
79
  return []
 
 
 
1
  from langdetect import detect
2
  from transformers import pipeline
3
 
4
  from utils.tag_utils import filter_tags
5
 
6
+ AiSummaryVersion = 3
7
  MinTagScore = 0.7
8
  summarization_pipeline = pipeline("summarization", model="Falconsai/text_summarization")
9
  en_translation_pipe = pipeline("translation", model="Helsinki-NLP/opus-mt-mul-en")
10
+ tag_gen_pipe_1 = pipeline("text-classification", model="dima806/news-category-classifier-distilbert")
11
+ tag_gen_pipe_2 = pipeline("text-classification", model="elozano/bert-base-cased-news-category")
 
12
 
13
 
14
  def summarize(id: str, text: str):
 
16
  return {
17
  "ver": AiSummaryVersion
18
  }
 
19
  summary = get_summarization(text) if len(text) > 3000 else text
 
 
20
  translated = get_en_translation(summary)
21
  tags = get_tags(translated, id)
22
  tags = filter_tags(tags)
 
23
  tags = sorted(list(set(tags)))
24
 
25
  value = {
 
66
  try:
67
  tags1 = [tag['label'] for tag in tag_gen_pipe_1(text) if tag['score'] >= MinTagScore]
68
  tags2 = [tag['label'] for tag in tag_gen_pipe_2(text) if tag['score'] >= MinTagScore]
69
+ return tags1 + tags2
 
70
  except:
71
  return []