import re from langdetect import detect from transformers import pipeline from utils.tag_utils import filter_tags AiSummaryVersion = 2 MinTagScore = 0.7 summarization_pipeline = pipeline("summarization", model="csebuetnlp/mT5_multilingual_XLSum") en_translation_pipe = pipeline("translation", model="Helsinki-NLP/opus-mt-mul-en") classification_pipe = pipeline("text-classification", model="Yueh-Huan/news-category-classification-distilbert") tag_gen_pipe_1 = pipeline("text-classification", model="yiyanghkust/finbert-esg-9-categories") tag_gen_pipe_2 = pipeline("text-classification", model="dima806/news-category-classifier-distilbert") tag_gen_pipe_3 = pipeline("text-classification", model="elozano/bert-base-cased-news-category") def summarize(id: str, text: str): if text is None or len(text) < 10: return { "ver": AiSummaryVersion } summary = get_summarization(text) if len(text) > 100 else text translated = get_en_translation(summary) tags1 = get_classification(translated) tags2 = get_tags(translated) tags = filter_tags(tags1 + tags2) tags = sorted(list(set(tags))) value = { "id": id, "ver": AiSummaryVersion, "summary": summary, "tags": tags, } return value def get_summarization(text: str): try: result = summarization_pipeline(text) return result[0]['summary_text'] if isinstance(result, list) else result['summary_text'] except: return None def get_en_translation(text: str): if text is None: return None try: if is_english(text): return text result = en_translation_pipe(text) return result[0]['translation_text'] if isinstance(result, list) else result['translation_text'] except: return None def is_english(text): try: lang = detect(text) return lang == 'en' except: return False def get_tags(text: str): if text is None: return [] try: print(tag_gen_pipe_1(text)) print(tag_gen_pipe_2(text)) print(tag_gen_pipe_3(text)) tags1 = [tag['label'] for tag in tag_gen_pipe_1(text) if tag['score'] >= MinTagScore] tags2 = [tag['label'] for tag in tag_gen_pipe_2(text) if tag['score'] >= MinTagScore] tags3 = [tag['label'] for tag in tag_gen_pipe_3(text) if tag['score'] >= MinTagScore] print(tags1) print(tags2) print(tags3) # print(tags1, tags2, tags3) return [] except: return [] def get_classification(text: str): if text is None: return [] try: result = classification_pipe(text) if isinstance(result, list): return [tag['label'].strip() for tag in result if tag['score'] > 0.75] else: return [result['label'].strip()] if result['score'] > 0.75 else [] except: return []