Spaces:
Runtime error
Runtime error
import re | |
from langdetect import detect | |
from transformers import pipeline | |
from utils.tag_utils import filter_tags | |
AiSummaryVersion = 2 | |
MinTagScore = 0.7 | |
summarization_pipeline = pipeline("summarization", model="csebuetnlp/mT5_multilingual_XLSum") | |
en_translation_pipe = pipeline("translation", model="Helsinki-NLP/opus-mt-mul-en") | |
tag_gen_pipe_1 = pipeline("text-classification", model="yiyanghkust/finbert-esg-9-categories") | |
tag_gen_pipe_2 = pipeline("text-classification", model="dima806/news-category-classifier-distilbert") | |
tag_gen_pipe_3 = pipeline("text-classification", model="elozano/bert-base-cased-news-category") | |
def summarize(id: str, text: str): | |
if text is None or len(text) < 10: | |
return { | |
"ver": AiSummaryVersion | |
} | |
summary = get_summarization(text) if len(text) > 100 else text | |
translated = get_en_translation(summary) | |
tags = get_tags(translated, id) | |
tags = filter_tags(tags) | |
tags = sorted(list(set(tags))) | |
value = { | |
"id": id, | |
"ver": AiSummaryVersion, | |
"summary": summary, | |
"tags": tags, | |
} | |
return value | |
def get_summarization(text: str): | |
try: | |
result = summarization_pipeline(text) | |
return result[0]['summary_text'] if isinstance(result, list) else result['summary_text'] | |
except: | |
return None | |
def get_en_translation(text: str): | |
if text is None: | |
return None | |
try: | |
if is_english(text): | |
return text | |
result = en_translation_pipe(text) | |
return result[0]['translation_text'] if isinstance(result, list) else result['translation_text'] | |
except: | |
return None | |
def is_english(text): | |
try: | |
lang = detect(text) | |
return lang == 'en' | |
except: | |
return False | |
def get_tags(text: str, id: str): | |
if text is None: | |
return [] | |
try: | |
tags1 = [tag['label'] for tag in tag_gen_pipe_1(text) if tag['score'] >= MinTagScore] | |
tags2 = [tag['label'] for tag in tag_gen_pipe_2(text) if tag['score'] >= MinTagScore] | |
tags3 = [tag['label'] for tag in tag_gen_pipe_3(text) if tag['score'] >= MinTagScore] | |
print('XXXXXXXXXXXXXXXXXXXXX') | |
print(id) | |
print(tags1, tags2, tags3) | |
print(text) | |
return tags1 + tags2 + tags3 | |
except: | |
return [] | |