GradioMistral / app.py
kasim90's picture
Update app.py
44afe21 verified
raw
history blame
2.1 kB
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer
from peft import LoraConfig, get_peft_model
from datasets import load_dataset
import spaces
import gradio as gr
# === 1️⃣ MODEL VE TOKENIZER YÜKLEME ===
MODEL_NAME = "mistralai/Mistral-7B-v0.1" # Hugging Face model adı
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
# === 2️⃣ CPU OPTİMİZASYONU ===
zero = torch.Tensor([0]).cuda()
print(zero.device) # <-- 'cpu' 🤔
@spaces.GPU
def greet(n):
print(zero.device) # <-- 'cuda:0' 🤗
return f"Hello {zero + n} Tensor"
print(zero.device) # <-- 'cuda:0' 🤗
device = "cpu" # CPU kullanıyoruz
torch_dtype = torch.float32 # float32 seçtik çünkü CPU'da bf16 genelde yok
model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, torch_dtype=torch_dtype)
# === 3️⃣ LoRA AYARLARI ===
lora_config = LoraConfig(
r=8,
lora_alpha=32,
lora_dropout=0.1,
bias="none",
target_modules=["q_proj", "v_proj"],
)
model = get_peft_model(model, lora_config)
# === 4️⃣ VERİ SETİ ===
dataset = load_dataset("oscar", "unshuffled_deduplicated_tr", trust_remote_code=True) # 🔥 ÇÖZÜM: trust_remote_code=True
train_data = dataset["train"].shuffle(seed=42).select(range(10000)) # Küçük subset
# === 5️⃣ TOKENLEŞTİRME FONKSİYONU ===
def tokenize_function(examples):
return tokenizer(examples["text"], truncation=True, max_length=512)
tokenized_datasets = train_data.map(tokenize_function, batched=True)
# === 6️⃣ EĞİTİM AYARLARI ===
training_args = TrainingArguments(
output_dir="./mistral_lora_cpu",
per_device_train_batch_size=1,
gradient_accumulation_steps=16,
learning_rate=5e-4,
num_train_epochs=1,
save_steps=500,
save_total_limit=2,
logging_dir="./logs",
logging_steps=10,
optim="adamw_torch", # 🔥 ÇÖZÜM: bitsandbytes yerine adamw_torch
)
# === 7️⃣ MODEL EĞİTİMİ ===
trainer = Trainer(
model=model,
args=training_args,
train_dataset=tokenized_datasets,
)
trainer.train()