Spaces:
Runtime error
Runtime error
File size: 1,908 Bytes
cdc5783 02f8d21 1d4052e cdc5783 02f8d21 cdc5783 a680719 a45c805 cdc5783 1d4052e 2c41d85 ed04ca3 2a78aa3 1d4052e cdc5783 a680719 cdc5783 a680719 cdc5783 02f8d21 cdc5783 e2ec8e0 cdc5783 e2ec8e0 cdc5783 e2ec8e0 cdc5783 02f8d21 cdc5783 02f8d21 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 |
from langdetect import detect
from transformers import pipeline
from utils.tag_utils import filter_tags
AiSummaryVersion = 4
summarization_pipeline = pipeline("summarization", model="csebuetnlp/mT5_multilingual_XLSum", max_length=512)
en_translation_pipe = pipeline("translation", model="Helsinki-NLP/opus-mt-mul-en")
text_to_tags_pipe = pipeline('text2text-generation', model='models/text2tags')
def summarize(id: str, text: str):
print('=================')
print(id)
if text is None or len(text) < 10:
return {
"ver": AiSummaryVersion
}
summary = get_summarization(text) if len(text) > 1000 else text
translated = get_en_translation(summary)
tags = get_tags(translated)
tags = sorted(list(set(tags)))
print(summary)
print(tags)
value = {
"id": id,
"ver": AiSummaryVersion,
"summary": summary,
"tags": tags,
}
return value
def get_summarization(text: str):
try:
result = summarization_pipeline(text)
return result[0]['summary_text'] if isinstance(result, list) else result['summary_text']
except:
return None
def get_en_translation(text: str):
if text is None:
return None
try:
if is_english(text):
return text
result = en_translation_pipe(text)
return result[0]['translation_text'] if isinstance(result, list) else result['translation_text']
except:
return None
def is_english(text):
try:
lang = detect(text)
return lang == 'en'
except:
return False
def get_tags(text: str):
if text is None:
return []
try:
result = text_to_tags_pipe(text)
tag_str = result[0]['generated_text'] if isinstance(result, list) else result['generated_text']
return [tag.strip() for tag in tag_str.split(',')]
except:
return []
|