Spaces:
Runtime error
Runtime error
File size: 1,829 Bytes
cdc5783 1c82268 d9a5c81 cdc5783 5513f06 cdc5783 a680719 cdc5783 1c82268 2c41d85 ed04ca3 2a78aa3 a680719 cdc5783 a680719 cdc5783 02f8d21 cdc5783 e2ec8e0 cdc5783 e2ec8e0 cdc5783 e2ec8e0 cdc5783 02f8d21 cdc5783 02f8d21 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 |
from langdetect import detect
from transformers import pipeline
AiSummaryVersion = 1
summarization_pipeline = pipeline("summarization", model="csebuetnlp/mT5_multilingual_XLSum", max_length=512, min_length=50)
en_translation_pipe = pipeline("translation", model="Helsinki-NLP/opus-mt-mul-en")
text_to_tags_pipe = pipeline('text2text-generation', model='PageOrg/t5-small-tagging-text', max_length=64)
def summarize(id: str, text: str):
if text is None or len(text) < 10:
return {
"ver": AiSummaryVersion
}
summary = get_summarization(text) if len(text) > 2000 else text
translated = get_en_translation(summary)
tags = get_tags(translated)
tags = sorted(list(set(tags)))
value = {
"id": id,
"ver": AiSummaryVersion,
"summary": summary,
"tags": tags,
}
return value
def get_summarization(text: str):
try:
result = summarization_pipeline(text)
return result[0]['summary_text'] if isinstance(result, list) else result['summary_text']
except:
return None
def get_en_translation(text: str):
if text is None:
return None
try:
if is_english(text):
return text
result = en_translation_pipe(text)
return result[0]['translation_text'] if isinstance(result, list) else result['translation_text']
except:
return None
def is_english(text):
try:
lang = detect(text)
return lang == 'en'
except:
return False
def get_tags(text: str):
if text is None:
return []
try:
result = text_to_tags_pipe(text)
tag_str = result[0]['generated_text'] if isinstance(result, list) else result['generated_text']
return [tag.strip() for tag in tag_str.split(',')]
except:
return []
|