omeryentur commited on
Commit
805e37b
·
verified ·
1 Parent(s): f00f089

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -26
app.py CHANGED
@@ -1,33 +1,19 @@
1
  import gradio as gr
2
- from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
3
  import torch
4
- from peft import PeftModel
5
-
6
- # Model and tokenizer names
7
- model_name = "google/gemma-2-2b-it"
8
- lora_model_name = "Anlam-Lab/gemma-2-2b-it-anlamlab-SA-Chatgpt4mini"
9
-
10
- # Configure 4-bit quantization
11
- bnb_config = BitsAndBytesConfig(
12
- load_in_4bit=True,
13
- bnb_4bit_quant_type="nf4",
14
- bnb_4bit_compute_dtype=torch.float16,
15
- bnb_4bit_use_double_quant=True,
16
  )
 
 
17
 
18
- # Initialize tokenizer
19
  tokenizer = AutoTokenizer.from_pretrained(model_name)
 
20
 
21
- # Load the base model with 4-bit quantization
22
- model = AutoModelForCausalLM.from_pretrained(
23
- model_name,
24
- device_map="auto",
25
- quantization_config=bnb_config
26
- )
27
-
28
- # Load the LoRA adapter
29
  model = PeftModel.from_pretrained(model, lora_model_name)
30
-
31
  def generate_response(input_text):
32
  inputs = tokenizer(input_text, return_tensors="pt").to(model.device)
33
 
@@ -40,12 +26,14 @@ def generate_response(input_text):
40
  }
41
 
42
  with torch.no_grad():
43
- outputs = model.generate(**inputs, **generation_config)
44
-
 
 
 
45
  response = tokenizer.decode(outputs[0])
46
  return response.split("<start_of_turn>model\n")[1].split("<end_of_turn>")[0]
47
 
48
- # Create Gradio interface
49
  iface = gr.Interface(
50
  fn=generate_response,
51
  inputs=gr.Textbox(lines=5, placeholder="Metninizi buraya girin..."),
 
1
  import gradio as gr
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer
3
  import torch
4
+ from peft import (
5
+ LoraConfig,
6
+ PeftModel,
7
+ prepare_model_for_kbit_training,
8
+ get_peft_model,
 
 
 
 
 
 
 
9
  )
10
+ model_name = "google/gemma-2-2b-it"
11
+ lora_model_name="Anlam-Lab/gemma-2-2b-it-anlamlab-SA-Chatgpt4mini"
12
 
 
13
  tokenizer = AutoTokenizer.from_pretrained(model_name)
14
+ model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", torch_dtype=torch.float16)
15
 
 
 
 
 
 
 
 
 
16
  model = PeftModel.from_pretrained(model, lora_model_name)
 
17
  def generate_response(input_text):
18
  inputs = tokenizer(input_text, return_tensors="pt").to(model.device)
19
 
 
26
  }
27
 
28
  with torch.no_grad():
29
+ outputs = model.generate(
30
+ **inputs,
31
+ **generation_config
32
+ )
33
+
34
  response = tokenizer.decode(outputs[0])
35
  return response.split("<start_of_turn>model\n")[1].split("<end_of_turn>")[0]
36
 
 
37
  iface = gr.Interface(
38
  fn=generate_response,
39
  inputs=gr.Textbox(lines=5, placeholder="Metninizi buraya girin..."),