Spaces:
Runtime error
Runtime error
File size: 2,320 Bytes
cdc5783 e2ec8e0 090f2d4 cdc5783 e2ec8e0 cdc5783 a680719 cdc5783 fd9b289 858ef45 1f6b7aa 2a78aa3 cdc5783 a680719 cdc5783 a680719 cdc5783 e2ec8e0 cdc5783 e2ec8e0 cdc5783 e2ec8e0 cdc5783 858ef45 cdc5783 3784e1c 858ef45 1f6b7aa e2ec8e0 cdc5783 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 |
import re
from langdetect import detect
from transformers import pipeline
from utils.tag_utils import filter_tags
AiSummaryVersion = 2
MinTagScore = 0.7
summarization_pipeline = pipeline("summarization", model="csebuetnlp/mT5_multilingual_XLSum")
en_translation_pipe = pipeline("translation", model="Helsinki-NLP/opus-mt-mul-en")
tag_gen_pipe_1 = pipeline("text-classification", model="yiyanghkust/finbert-esg-9-categories")
tag_gen_pipe_2 = pipeline("text-classification", model="dima806/news-category-classifier-distilbert")
tag_gen_pipe_3 = pipeline("text-classification", model="elozano/bert-base-cased-news-category")
def summarize(id: str, text: str):
if text is None or len(text) < 10:
return {
"ver": AiSummaryVersion
}
summary = get_summarization(text) if len(text) > 100 else text
translated = get_en_translation(text)
tags = get_tags(translated, id)
tags = filter_tags(tags)
tags = sorted(list(set(tags)))
value = {
"id": id,
"ver": AiSummaryVersion,
"summary": summary,
"tags": tags,
}
return value
def get_summarization(text: str):
try:
result = summarization_pipeline(text)
return result[0]['summary_text'] if isinstance(result, list) else result['summary_text']
except:
return None
def get_en_translation(text: str):
if text is None:
return None
try:
if is_english(text):
return text
result = en_translation_pipe(text)
return result[0]['translation_text'] if isinstance(result, list) else result['translation_text']
except:
return None
def is_english(text):
try:
lang = detect(text)
return lang == 'en'
except:
return False
def get_tags(text: str, id: str):
if text is None:
return []
try:
tags1 = [tag['label'] for tag in tag_gen_pipe_1(text) if tag['score'] >= MinTagScore]
tags2 = [tag['label'] for tag in tag_gen_pipe_2(text) if tag['score'] >= MinTagScore]
tags3 = [tag['label'] for tag in tag_gen_pipe_3(text) if tag['score'] >= MinTagScore]
print('XXXXXXXXXXXXXXXXXXXXX')
print(id)
print(tags1, tags2, tags3)
print(text)
return tags1 + tags2 + tags3
except:
return []
|