from langdetect import detect from transformers import pipeline from utils.tag_utils import filter_tags AiSummaryVersion = 3 MinTagScore = 0.7 summarization_pipeline = pipeline("summarization", model="Falconsai/text_summarization") en_translation_pipe = pipeline("translation", model="Helsinki-NLP/opus-mt-mul-en") tag_gen_pipe_1 = pipeline("text-classification", model="dima806/news-category-classifier-distilbert") tag_gen_pipe_2 = pipeline("text-classification", model="elozano/bert-base-cased-news-category") def summarize(id: str, text: str): if text is None or len(text) < 10: return { "ver": AiSummaryVersion } summary = get_summarization(text) if len(text) > 3000 else text translated = get_en_translation(summary) tags = get_tags(translated, id) tags = filter_tags(tags) tags = sorted(list(set(tags))) value = { "id": id, "ver": AiSummaryVersion, "summary": summary, "tags": tags, } return value def get_summarization(text: str): try: # Max / Min number of words result = summarization_pipeline(text, max_length=500, min_length=100, do_sample=False) return result[0]['summary_text'] if isinstance(result, list) else result['summary_text'] except: return None def get_en_translation(text: str): if text is None: return None try: if is_english(text): return text result = en_translation_pipe(text) return result[0]['translation_text'] if isinstance(result, list) else result['translation_text'] except: return None def is_english(text): try: lang = detect(text) return lang == 'en' except: return False def get_tags(text: str, id: str): if text is None: return [] try: tags1 = [tag['label'] for tag in tag_gen_pipe_1(text) if tag['score'] >= MinTagScore] tags2 = [tag['label'] for tag in tag_gen_pipe_2(text) if tag['score'] >= MinTagScore] return tags1 + tags2 except: return []