Azerbaijani / app.py
BusinessDev's picture
more Storage
a592fa2
raw
history blame
2.09 kB
from transformers import MBartForConditionalGeneration, MBart50Tokenizer
import dat
import os
import platform
def setvar():
if platform.system() == "Windows":
print("Windows detected. Assigning cache directory to Transformers in AppData \ Local.")
transformers_cache_directory = os.path.join(os.getenv('LOCALAPPDATA'), 'transformers_cache')
if not os.path.exists(transformers_cache_directory):
try:
os.mkdir(transformers_cache_directory)
print(f"First launch. Directory '{transformers_cache_directory}' created successfully.")
except OSError as e:
print(f"Error creating directory '{transformers_cache_directory}': {e}")
else:
print(f"Directory '{transformers_cache_directory}' already exists.")
os.environ['TRANSFORMERS_CACHE'] = transformers_cache_directory
print("Environment variable assigned.")
del transformers_cache_directory
else:
print("Windows not detected. Assignment of Transformers cache directory not necessary.")
# Load the model and tokenizer
model_name = "LocalDoc/mbart_large_qa_azerbaijan"
tokenizer = MBart50Tokenizer.from_pretrained(model_name, src_lang="en_XX", tgt_lang="az_AZ")
model = MBartForConditionalGeneration.from_pretrained(model_name)
def answer_question(context, question):
# Prepare input text
input_text = f"context: {context} question: {question}"
inputs = tokenizer(input_text, return_tensors="pt", max_length=5120000, truncation=False, padding="max_length")
# Generate answer
outputs = model.generate(
input_ids=inputs["input_ids"],
attention_mask=inputs["attention_mask"],
max_length=5120000,
num_beams=5,
early_stopping=True
)
# Decode the answer
answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
return answer
# Example usage
context = dat.data
question = "Vətəndaşın icazəsi olmadan videosunu çəkmək qadağandır?"
answer = answer_question(context, question)
print(answer)