Update app.py
Browse files
app.py
CHANGED
@@ -1,10 +1,24 @@
|
|
1 |
import gradio as gr
|
2 |
-
from transformers import
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
|
4 |
def reply(prompt):
|
5 |
-
|
6 |
-
output =
|
7 |
-
|
|
|
|
|
8 |
|
9 |
demo = gr.Interface(fn=reply, inputs="text", outputs="text")
|
10 |
demo.launch()
|
|
|
1 |
import gradio as gr
|
2 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
3 |
+
from peft import PeftModel
|
4 |
+
|
5 |
+
base_model_name = "nltpt/Llama-3.2-1B-Instruct"
|
6 |
+
base_model = AutoModelForCausalLM.from_pretrained(base_model_name)
|
7 |
+
|
8 |
+
tokenizer = AutoTokenizer.from_pretrained(base_model_name)
|
9 |
+
|
10 |
+
peft_model_path = "genaforvena/huivam_finnegan_llama3.2-1b"
|
11 |
+
model = PeftModel.from_pretrained(base_model, peft_model_path)
|
12 |
+
|
13 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
14 |
+
model.to(device)
|
15 |
|
16 |
def reply(prompt):
|
17 |
+
input_ids = tokenizer.encode(prompt, return_tensors="pt")
|
18 |
+
output = model.generate(input_ids, max_new_tokens=250, num_return_sequences=1)
|
19 |
+
generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
|
20 |
+
|
21 |
+
return generated_text
|
22 |
|
23 |
demo = gr.Interface(fn=reply, inputs="text", outputs="text")
|
24 |
demo.launch()
|