txt_finetune / gpt3-pretrained
SamiKoen's picture
Update gpt3-pretrained
d413d16
from transformers import AutoTokenizer, AutoModelForCausalLM
from transformers import LineByLineTextDataset, DataCollatorForLanguageModeling, Trainer, TrainingArguments
import openai
import os
API_URL = "https://api.openai.com/v1/chat/completions"
openai.api_key = os.environ["OPENAI_API_KEY"]
model_name="gpt-3.5-turbo"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)
filename = "machintoshhd/users/izerkoen/downloads/content.txt"
with open(filename, "r") as f:
text = f.read()
train_dataset = LineByLineTextDataset("content.txt", tokenizer=tokenizer, block_size=128)
data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)
training_args = TrainingArguments(
output_dir="./results",
overwrite_output_dir=True,
num_train_epochs=3,
per_device_train_batch_size=16,
save_steps=1000,
save_total_limit=2,
prediction_loss_only=True,
)
trainer = Trainer(
model=model,
args=training_args,
data_collator=data_collator,
train_dataset=train_dataset,
)
trainer.train()
input_text = "sen nasil bir trek asistanisin?"
input_ids = tokenizer.encode(input_text, return_tensors="pt")
output = model.generate(input_ids, max_length=50, do_sample=True)
generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
print(generated_text)