|
--- |
|
language: |
|
- ar |
|
metrics: |
|
- accuracy |
|
- bleu |
|
library_name: transformers |
|
pipeline_tag: text2text-generation |
|
--- |
|
This model is under trial. |
|
|
|
The number in the generated text represents the category of the news, as shown below. |
|
category_mapping = { |
|
|
|
'Political':1, |
|
'Economy':2, |
|
'Health':3, |
|
'Sport':4, |
|
'Culture':5, |
|
'Technology':6, |
|
'Art':7, |
|
'Accidents':8 |
|
} |
|
|
|
![image/png](https://cdn-uploads.huggingface.co/production/uploads/645817bb72b60ae7a37f8f40/6gZDjcAOhWLvN5xF-E2FE.png) |
|
|
|
# Example usage |
|
from transformers import T5ForConditionalGeneration, T5Tokenizer, pipeline |
|
from arabert.preprocess import ArabertPreprocessor |
|
|
|
arabert_prep = ArabertPreprocessor(model_name="aubmindlab/bert-base-arabertv2") |
|
model_name="Hezam/arabic-T5-news-classification-generation" |
|
model = T5ForConditionalGeneration.from_pretrained(model_name) |
|
tokenizer = T5Tokenizer.from_pretrained(model_name) |
|
generation_pipeline = pipeline("text2text-generation",model=model,tokenizer=tokenizer) |
|
|
|
text = "عدم التهاون في تحقيق الاحلام" |
|
text_clean = arabert_prep.preprocess(text) |
|
g=generation_pipeline(text_clean, |
|
num_beams=10, |
|
max_length=config.Generation_LEN, |
|
top_p=0.9, |
|
repetition_penalty = 3.0, |
|
no_repeat_ngram_size = 3)[0]["generated_text"] |
|
|
|
|