genaforvena commited on
Commit
56529ab
·
verified ·
1 Parent(s): 33a23e4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -4
app.py CHANGED
@@ -1,10 +1,24 @@
1
  import gradio as gr
2
- from transformers import pipeline
 
 
 
 
 
 
 
 
 
 
 
 
3
 
4
  def reply(prompt):
5
- generator = pipeline("text-generation", model="genaforvena/huivam_finnegan_llama3.2-1b")
6
- output = generator(prompt, max_new_tokens=150)
7
- return output[0]['generated_text']
 
 
8
 
9
  demo = gr.Interface(fn=reply, inputs="text", outputs="text")
10
  demo.launch()
 
1
  import gradio as gr
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer
3
+ from peft import PeftModel
4
+
5
+ base_model_name = "nltpt/Llama-3.2-1B-Instruct"
6
+ base_model = AutoModelForCausalLM.from_pretrained(base_model_name)
7
+
8
+ tokenizer = AutoTokenizer.from_pretrained(base_model_name)
9
+
10
+ peft_model_path = "genaforvena/huivam_finnegan_llama3.2-1b"
11
+ model = PeftModel.from_pretrained(base_model, peft_model_path)
12
+
13
+ device = "cuda" if torch.cuda.is_available() else "cpu"
14
+ model.to(device)
15
 
16
  def reply(prompt):
17
+ input_ids = tokenizer.encode(prompt, return_tensors="pt")
18
+ output = model.generate(input_ids, max_new_tokens=250, num_return_sequences=1)
19
+ generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
20
+
21
+ return generated_text
22
 
23
  demo = gr.Interface(fn=reply, inputs="text", outputs="text")
24
  demo.launch()