kasim90 commited on
Commit
f69cee3
·
verified ·
1 Parent(s): b0fe154

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -75
app.py CHANGED
@@ -1,75 +1,12 @@
1
- import gradio as gr
2
- import os
3
- from datasets import load_dataset
4
- from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer
5
- from huggingface_hub import login
6
- import os
7
- os.environ["CUDA_VISIBLE_DEVICES"] = "0"
8
- os.environ["TORCH_CUDA_ARCH_LIST"] = ""
9
-
10
-
11
- # Hugging Face Token'ı Secrets'ten al
12
- HF_TOKEN = os.getenv("HF_TOKEN")
13
- if HF_TOKEN:
14
- login(HF_TOKEN)
15
- else:
16
- raise ValueError("Hugging Face Token bulunamadı! Lütfen Secrets bölümüne ekleyin.")
17
-
18
- # Model ve tokenizer yükleme
19
- model_name = "mistralai/Mistral-7B-v0.1"
20
- tokenizer = AutoTokenizer.from_pretrained(model_name, token=HF_TOKEN)
21
-
22
- # Modeli 32-bit hassasiyetle yükleme (CPU uyumlu)
23
- model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", token=HF_TOKEN)
24
-
25
- # OSCAR Türkçe'yi yükleme fonksiyonu
26
- def load_and_clean_oscar():
27
- dataset = load_dataset("oscar-corpus/OSCAR-2301", lang="tr", split="train")
28
-
29
- # Veri temizleme
30
- def clean_text(example):
31
- text = example["text"].replace("\n", " ") # Satır sonlarını temizle
32
- return {"text": text}
33
-
34
- dataset = dataset.map(clean_text)
35
- return dataset
36
-
37
- # Fine-tuning fonksiyonu
38
- def fine_tune(epochs, lr):
39
- dataset = load_and_clean_oscar()
40
-
41
- training_args = TrainingArguments(
42
- output_dir="./mistral-finetuned",
43
- per_device_train_batch_size=1,
44
- gradient_accumulation_steps=8,
45
- save_steps=1000,
46
- save_total_limit=2,
47
- num_train_epochs=int(epochs),
48
- learning_rate=float(lr),
49
- logging_dir="./logs",
50
- logging_steps=50
51
- )
52
-
53
- trainer = Trainer(
54
- model=model,
55
- args=training_args,
56
- train_dataset=dataset
57
- )
58
-
59
- trainer.train()
60
- return "Fine-tuning tamamlandı!"
61
-
62
- # Gradio Arayüzü
63
- with gr.Blocks() as demo:
64
- gr.Markdown("# Mistral Türkçe Fine-Tuning")
65
-
66
- epochs = gr.Number(value=3, label="Epoch Sayısı")
67
- lr = gr.Textbox(value="2e-4", label="Öğrenme Oranı (Learning Rate)")
68
-
69
- train_button = gr.Button("Eğitimi Başlat")
70
- output_text = gr.Textbox(label="Sonuç")
71
-
72
- train_button.click(fine_tune, inputs=[epochs, lr], outputs=output_text)
73
-
74
- # Public link oluşturmak için 'share=True' parametresini ekliyoruz
75
- demo.launch()
 
1
+ from transformers import AutoModelForCausalLM, AutoTokenizer
2
+ import torch
3
+
4
+ model_name = "mistralai/Mistral-7B-v0.1" # Örnek bir model
5
+
6
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
7
+ model = AutoModelForCausalLM.from_pretrained(
8
+ model_name,
9
+ device_map="auto",
10
+ load_in_8bit=True, # 8-bit yükleme
11
+ torch_dtype=torch.float16
12
+ )