Spaces:
Sleeping
Sleeping
from transformers import MBartForConditionalGeneration, MBart50Tokenizer | |
def summarize_text(text, max_length=150, min_length=30, num_beams=4): | |
# Load the model and tokenizer | |
model_name = "facebook/mbart-large-50-many-to-many-mmt" | |
tokenizer = MBart50Tokenizer.from_pretrained(model_name) | |
model = MBartForConditionalGeneration.from_pretrained(model_name) | |
# Ensure max_length and min_length are integers | |
max_length = int(max_length) | |
min_length = int(min_length) | |
num_beams = int(num_beams) | |
# Tokenize the input text | |
inputs = tokenizer(text, return_tensors="pt", max_length=1024, truncation=True) | |
# Generate the summary | |
summary_ids = model.generate( | |
inputs["input_ids"], | |
max_length=max_length, | |
min_length=min_length, | |
num_beams=num_beams, | |
length_penalty=2.0, | |
early_stopping=True | |
) | |
# Decode the summary | |
summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True) | |
return summary | |
# Simple summarization function | |
def simple_summarize_text(text): | |
from transformers import pipeline | |
summarizer = pipeline("summarization", model="Falconsai/text_summarization") | |
summary_text = summarizer(text, max_length=50, min_length=30, do_sample=False)[0]['summary_text'] | |
return summary_text | |
# Example text to summarize | |
#user_text = 'Cat o sa mai astept sa imi deblocati cartela ca nu pot vorbi in Spania si toti prietenii mei asteapta sa ii sun de sarbatori. Deci cand rezolvati problema mea cu cartela?' | |
#model_name = "facebook/mbart-large-cc25" | |
def example_summarize_text(model_name, text): | |
# Model for multi-language summarization | |
summary = summarize_text(model_name, text) | |
print("Summary:", summary) | |
#example_summarize_text(model_name, user_text) | |
#simple_summarize_text(user_text) | |