Spaces:
Runtime error
Runtime error
File size: 3,114 Bytes
7489b32 0d4ee70 7489b32 0d4ee70 7489b32 9cb9d28 0d4ee70 9cb9d28 0d4ee70 f69cee3 9cb9d28 7489b32 b94fa53 7489b32 b94fa53 9cb9d28 b94fa53 9cb9d28 b94fa53 7489b32 b94fa53 9cb9d28 7489b32 9cb9d28 7489b32 9cb9d28 7489b32 b94fa53 7489b32 b94fa53 7489b32 b94fa53 9cb9d28 b94fa53 9cb9d28 b94fa53 7489b32 9cb9d28 b94fa53 94c5b67 9cb9d28 7489b32 b94fa53 7489b32 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 |
import os
import torch
import gradio as gr
import spaces
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer
from peft import LoraConfig, get_peft_model
from datasets import load_dataset, Dataset
from huggingface_hub import notebook_login, HfApi
# === 1️⃣ MODEL VE TOKENIZER YÜKLEME ===
MODEL_NAME = "mistralai/Mistral-7B-v0.1"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
device = "cuda" if torch.cuda.is_available() else "cpu"
model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, torch_dtype=torch.float32).to(device)
# === 2️⃣ LoRA AYARLARI ===
lora_config = LoraConfig(
r=8,
lora_alpha=32,
lora_dropout=0.1,
bias="none",
target_modules=["q_proj", "v_proj"],
)
model = get_peft_model(model, lora_config)
# === 3️⃣ VERİ SETİ ===
DATASET_PATH = "/home/user/app/oscar_tr.parquet"
if os.path.exists(DATASET_PATH):
print("📂 Kaydedilmiş dataset bulundu, yükleniyor...")
dataset = Dataset.from_parquet(DATASET_PATH)
else:
print("🌍 Veri seti indiriliyor ve kaydediliyor...")
dataset = load_dataset("oscar", "unshuffled_deduplicated_tr", split="train", streaming=True, trust_remote_code=True)
dataset = dataset.shuffle(seed=42).select(range(10000)) # Küçük subset
dataset.to_parquet(DATASET_PATH) # İlk çalışmada kaydediyoruz
# === 4️⃣ TOKENLEŞTİRME ===
def tokenize_function(examples):
return tokenizer(examples["text"], truncation=True, max_length=512)
tokenized_datasets = dataset.map(tokenize_function, batched=True)
# === 5️⃣ EĞİTİM AYARLARI ===
training_args = TrainingArguments(
output_dir="./mistral_lora_cpu",
per_device_train_batch_size=1,
gradient_accumulation_steps=16,
learning_rate=5e-4,
num_train_epochs=1,
save_steps=500,
save_total_limit=2,
logging_dir="./logs",
logging_steps=10,
optim="adamw_torch",
)
# === 6️⃣ GPU İLE EĞİTİM BAŞLATMA ===
@spaces.GPU
def train_model():
trainer = Trainer(
model=model,
args=training_args,
train_dataset=tokenized_datasets,
)
trainer.train()
return "✅ Model Eğitimi Tamamlandı!"
# === 7️⃣ MODELİ HUGGING FACE HUB'A YÜKLEME ===
def upload_model():
notebook_login() # Hugging Face hesabına giriş yap
api = HfApi()
api.upload_folder(
folder_path="./mistral_lora_cpu",
repo_id="kullanici_adin/mistral-lora-modeli",
repo_type="model",
)
return "✅ Model Hugging Face Hub'a Yüklendi!"
# === 8️⃣ GRADIO ARAYÜZÜ ===
def generate_text(prompt):
inputs = tokenizer(prompt, return_tensors="pt").to(device)
output = model.generate(**inputs, max_length=100)
return tokenizer.decode(output[0], skip_special_tokens=True)
iface = gr.Interface(
fn=generate_text,
inputs=gr.Textbox(lines=2, placeholder="Buraya bir şeyler yaz..."),
outputs="text",
live=True
)
# === 9️⃣ BAŞLATMA ===
if __name__ == "__main__":
train_model() # Eğitimi başlat
upload_model() # Modeli Hugging Face Hub'a yükle
iface.launch() # Gradio UI başlat
|