summaryapi / utils /summary_utils.py
quyip
fix
3784e1c
raw
history blame
2.92 kB
import re
from langdetect import detect
from transformers import pipeline
from utils.tag_utils import filter_tags
AiSummaryVersion = 2
MinTagScore = 0.7
summarization_pipeline = pipeline("summarization", model="csebuetnlp/mT5_multilingual_XLSum")
en_translation_pipe = pipeline("translation", model="Helsinki-NLP/opus-mt-mul-en")
classification_pipe = pipeline("text-classification", model="Yueh-Huan/news-category-classification-distilbert")
tag_gen_pipe_1 = pipeline("text-classification", model="yiyanghkust/finbert-esg-9-categories")
tag_gen_pipe_2 = pipeline("text-classification", model="dima806/news-category-classifier-distilbert")
tag_gen_pipe_3 = pipeline("text-classification", model="elozano/bert-base-cased-news-category")
def summarize(id: str, text: str):
if text is None or len(text) < 10:
return {
"ver": AiSummaryVersion
}
summary = get_summarization(text) if len(text) > 100 else text
translated = get_en_translation(summary)
tags1 = get_classification(translated)
tags2 = get_tags(translated)
tags = filter_tags(tags1 + tags2)
tags = sorted(list(set(tags)))
value = {
"id": id,
"ver": AiSummaryVersion,
"summary": summary,
"tags": tags,
}
return value
def get_summarization(text: str):
try:
result = summarization_pipeline(text)
return result[0]['summary_text'] if isinstance(result, list) else result['summary_text']
except:
return None
def get_en_translation(text: str):
if text is None:
return None
try:
if is_english(text):
return text
result = en_translation_pipe(text)
return result[0]['translation_text'] if isinstance(result, list) else result['translation_text']
except:
return None
def is_english(text):
try:
lang = detect(text)
return lang == 'en'
except:
return False
def get_tags(text: str):
if text is None:
return []
try:
print(tag_gen_pipe_1(text))
print(tag_gen_pipe_2(text))
print(tag_gen_pipe_3(text))
tags1 = [tag['label'] for tag in tag_gen_pipe_1(text) if tag['score'] >= MinTagScore]
tags2 = [tag['label'] for tag in tag_gen_pipe_2(text) if tag['score'] >= MinTagScore]
tags3 = [tag['label'] for tag in tag_gen_pipe_3(text) if tag['score'] >= MinTagScore]
print(tags1)
print(tags2)
print(tags3)
# print(tags1, tags2, tags3)
return []
except:
return []
def get_classification(text: str):
if text is None:
return []
try:
result = classification_pipe(text)
if isinstance(result, list):
return [tag['label'].strip() for tag in result if tag['score'] > 0.75]
else:
return [result['label'].strip()] if result['score'] > 0.75 else []
except:
return []