quyip commited on
Commit
e2ec8e0
·
1 Parent(s): 51a46da
Files changed (1) hide show
  1. utils/summary_utils.py +12 -13
utils/summary_utils.py CHANGED
@@ -5,11 +5,13 @@ from transformers import pipeline
5
 
6
  from utils.tag_utils import filter_tags
7
 
8
- AiSummaryVersion = 1
9
  summarization_pipeline = pipeline("summarization", model="csebuetnlp/mT5_multilingual_XLSum")
10
  en_translation_pipe = pipeline("translation", model="Helsinki-NLP/opus-mt-mul-en")
11
  classification_pipe = pipeline("text-classification", model="Yueh-Huan/news-category-classification-distilbert")
12
- tag_gen_pipe = pipeline("text2text-generation", model="fabiochiu/t5-base-tag-generation")
 
 
13
 
14
 
15
  def summarize(id: str, text: str):
@@ -37,8 +39,7 @@ def get_summarization(text: str):
37
  try:
38
  result = summarization_pipeline(text)
39
  return result[0]['summary_text'] if isinstance(result, list) else result['summary_text']
40
- except Exception as e:
41
- print(e)
42
  return None
43
 
44
 
@@ -50,8 +51,7 @@ def get_en_translation(text: str):
50
  return text
51
  result = en_translation_pipe(text)
52
  return result[0]['translation_text'] if isinstance(result, list) else result['translation_text']
53
- except Exception as e:
54
- print(e)
55
  return None
56
 
57
 
@@ -59,8 +59,7 @@ def is_english(text):
59
  try:
60
  lang = detect(text)
61
  return lang == 'en'
62
- except Exception as e:
63
- print(e)
64
  return False
65
 
66
 
@@ -68,14 +67,15 @@ def get_tags(text: str):
68
  if text is None:
69
  return []
70
  try:
71
- result = tag_gen_pipe(text)
 
 
72
  tag_str = result[0]['generated_text'] if isinstance(result, list) else result['generated_text']
73
  tags = re.split(r'[&,]', tag_str)
74
  tags = [tag.strip() for tag in tags]
75
  tags = [tag for tag in tags if len(tag) > 2 and len(tag.split(' ')) == 1]
76
  return tags
77
- except Exception as e:
78
- print(e)
79
  return []
80
 
81
 
@@ -88,6 +88,5 @@ def get_classification(text: str):
88
  return [tag['label'].strip() for tag in result if tag['score'] > 0.75]
89
  else:
90
  return [result['label'].strip()] if result['score'] > 0.75 else []
91
- except Exception as e:
92
- print(e)
93
  return []
 
5
 
6
  from utils.tag_utils import filter_tags
7
 
8
+ AiSummaryVersion = 2
9
  summarization_pipeline = pipeline("summarization", model="csebuetnlp/mT5_multilingual_XLSum")
10
  en_translation_pipe = pipeline("translation", model="Helsinki-NLP/opus-mt-mul-en")
11
  classification_pipe = pipeline("text-classification", model="Yueh-Huan/news-category-classification-distilbert")
12
+ tag_gen_pipe_1 = pipeline("text-classification", model="yiyanghkust/finbert-esg-9-categories")
13
+ tag_gen_pipe_2 = pipeline("text-classification", model="dima806/news-category-classifier-distilbert")
14
+ tag_gen_pipe_3 = pipeline("text-classification", model="elozano/bert-base-cased-news-category")
15
 
16
 
17
  def summarize(id: str, text: str):
 
39
  try:
40
  result = summarization_pipeline(text)
41
  return result[0]['summary_text'] if isinstance(result, list) else result['summary_text']
42
+ except:
 
43
  return None
44
 
45
 
 
51
  return text
52
  result = en_translation_pipe(text)
53
  return result[0]['translation_text'] if isinstance(result, list) else result['translation_text']
54
+ except:
 
55
  return None
56
 
57
 
 
59
  try:
60
  lang = detect(text)
61
  return lang == 'en'
62
+ except:
 
63
  return False
64
 
65
 
 
67
  if text is None:
68
  return []
69
  try:
70
+ result = tag_gen_pipe_1(text)
71
+ print('XXXXXXXXXXXXXXXXXXXXX')
72
+ print(result)
73
  tag_str = result[0]['generated_text'] if isinstance(result, list) else result['generated_text']
74
  tags = re.split(r'[&,]', tag_str)
75
  tags = [tag.strip() for tag in tags]
76
  tags = [tag for tag in tags if len(tag) > 2 and len(tag.split(' ')) == 1]
77
  return tags
78
+ except:
 
79
  return []
80
 
81
 
 
88
  return [tag['label'].strip() for tag in result if tag['score'] > 0.75]
89
  else:
90
  return [result['label'].strip()] if result['score'] > 0.75 else []
91
+ except:
 
92
  return []