File size: 890 Bytes
af85fa9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
mport torch
from transformers import GPTNeoForCausalLM, GPT2Tokenizer

# Load the pre-trained GPT-Neo model tokenizer (Replace with smaller or larger version)
model = GPTNeoForCausalLM.from_pretrained('EleutherAI/gpt-neo-1.3B')
tokenizer = GPT2Tokenizer.from_pretrained('EleutherAI/gpt-neo-1.3B')

# Load the text dataset
with open('text.txt', 'r') as f:
    text = f.read()

# Tokenize the text dataset
encoding = tokenizer.encode(text, return_tensors='pt')

# Fine-tune the model on the text dataset
model.train()
optimizer = torch.optim.Adam(model.parameters(), lr=5e-5)
for i in range(100):
    loss = model(encoding, labels=encoding)[0]
    loss.backward()
    optimizer.step()
    optimizer.zero_grad()
    print(f'Epoch {i+1}, Loss: {loss.item()}')

# Save the fine-tuned model and tokenizer
model.save_pretrained('fine-tuned-gpt-neo')
tokenizer.save_pretrained('fine-tuned-gpt-neo')