nicolaakmal commited on
Commit
bd49b42
1 Parent(s): a72d28b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -11
app.py CHANGED
@@ -1,17 +1,21 @@
1
  import gradio as gr
2
  from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer
 
3
 
4
- # Load the model and tokenizer from Hugging Face Hub
5
- model_name = "nicolaakmal/llama32-lora-finetuned-v2"
6
- tokenizer = AutoTokenizer.from_pretrained(model_name)
7
- model = AutoModelForCausalLM.from_pretrained(model_name)
8
 
9
- # Function to generate a response from the model
 
 
 
 
 
 
 
10
  def generate_response(prompt):
11
- # Mengatur input untuk model menggunakan tokenizer
12
  inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
13
-
14
- # Menggunakan TextStreamer untuk streaming teks keluaran
15
  text_streamer = TextStreamer(tokenizer, skip_prompt=True)
16
  outputs = model.generate(
17
  input_ids=inputs["input_ids"],
@@ -21,12 +25,10 @@ def generate_response(prompt):
21
  temperature=1.5,
22
  top_p=0.1
23
  )
24
-
25
- # Menghasilkan respons dari streamer
26
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
27
  return response
28
 
29
- # Create Gradio interface
30
  iface = gr.Interface(
31
  fn=generate_response,
32
  inputs="text",
 
1
  import gradio as gr
2
  from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer
3
+ from peft import PeftModel # Pastikan PEFT terinstal
4
 
5
+ # Tentukan model dasar dan LoRA weights
6
+ base_model_name = "meta-llama/Llama-3.2-3B-Instruct" # Model dasar sesuai yang ada di gambar
7
+ lora_model_name = "nicolaakmal/llama32-lora-finetuned-v3"
 
8
 
9
+ # Load tokenizer dan base model
10
+ tokenizer = AutoTokenizer.from_pretrained(base_model_name)
11
+ base_model = AutoModelForCausalLM.from_pretrained(base_model_name)
12
+
13
+ # Load LoRA model sebagai PeftModel
14
+ model = PeftModel.from_pretrained(base_model, lora_model_name)
15
+
16
+ # Fungsi untuk menghasilkan respons
17
  def generate_response(prompt):
 
18
  inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
 
 
19
  text_streamer = TextStreamer(tokenizer, skip_prompt=True)
20
  outputs = model.generate(
21
  input_ids=inputs["input_ids"],
 
25
  temperature=1.5,
26
  top_p=0.1
27
  )
 
 
28
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
29
  return response
30
 
31
+ # Interface Gradio
32
  iface = gr.Interface(
33
  fn=generate_response,
34
  inputs="text",