import re from langdetect import detect from transformers import pipeline from utils.tag_utils import filter_tags AiSummaryVersion = 2 MinTagScore = 0.7 summarization_pipeline = pipeline("summarization", model="csebuetnlp/mT5_multilingual_XLSum") en_translation_pipe = pipeline("translation", model="Helsinki-NLP/opus-mt-mul-en") tag_gen_pipe_1 = pipeline("text-classification", model="yiyanghkust/finbert-esg-9-categories") tag_gen_pipe_2 = pipeline("text-classification", model="dima806/news-category-classifier-distilbert") tag_gen_pipe_3 = pipeline("text-classification", model="elozano/bert-base-cased-news-category") def summarize(id: str, text: str): if text is None or len(text) < 10: return { "ver": AiSummaryVersion } summary = get_summarization(text) if len(text) > 100 else text translated = get_en_translation(text) tags = get_tags(translated, id) tags = filter_tags(tags) tags = sorted(list(set(tags))) value = { "id": id, "ver": AiSummaryVersion, "summary": summary, "tags": tags, } return value def get_summarization(text: str): try: result = summarization_pipeline(text) return result[0]['summary_text'] if isinstance(result, list) else result['summary_text'] except: return None def get_en_translation(text: str): if text is None: return None try: if is_english(text): return text result = en_translation_pipe(text) return result[0]['translation_text'] if isinstance(result, list) else result['translation_text'] except: return None def is_english(text): try: lang = detect(text) return lang == 'en' except: return False def get_tags(text: str, id: str): if text is None: return [] try: tags1 = [tag['label'] for tag in tag_gen_pipe_1(text) if tag['score'] >= MinTagScore] tags2 = [tag['label'] for tag in tag_gen_pipe_2(text) if tag['score'] >= MinTagScore] tags3 = [tag['label'] for tag in tag_gen_pipe_3(text) if tag['score'] >= MinTagScore] print('XXXXXXXXXXXXXXXXXXXXX') print(id) print(tags1, tags2, tags3) print(text) return tags1 + tags2 + tags3 except: return []