daresearch commited on
Commit
a3b0d76
·
verified ·
1 Parent(s): 47c35cd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -15
app.py CHANGED
@@ -1,31 +1,49 @@
1
- import gradio as gr
2
- import torch
3
  from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
4
  from peft import PeftModel
 
 
 
 
 
5
 
6
- BASE_MODEL = "meta-llama/Llama-3.3-70B-Instruct"
7
- ADAPTER = "daresearch/Llama-3.3-70B-ft-exec-roles"
 
 
 
 
 
 
 
 
 
 
 
8
 
9
- # Load the base model
10
- base_model = AutoModelForCausalLM.from_pretrained(BASE_MODEL, torch_dtype=torch.float16, device_map="auto")
11
- tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
12
 
13
- # Load the adapter
14
- model = PeftModel.from_pretrained(base_model, ADAPTER, device_map="auto")
15
 
16
- # Create a text generation pipeline
17
- pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
18
 
19
- def infer(prompt):
20
- outputs = pipe(prompt, max_length=128)
 
 
21
  return outputs[0]["generated_text"]
22
 
 
23
  iface = gr.Interface(
24
- fn=infer,
25
  inputs="text",
26
  outputs="text",
27
- title="LoRA-Enhanced Model"
 
28
  )
29
 
 
30
  if __name__ == "__main__":
31
  iface.launch()
 
 
 
1
  from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
2
  from peft import PeftModel
3
+ import gradio as gr
4
+
5
+ # Step 1: Load the base model
6
+ base_model_name = "meta-llama/Llama-3.3-70B-Instruct"
7
+ adapter_repo = "daresearch/Llama-3.3-70B-ft-exec-roles"
8
 
9
+ # Load the base model (LlamaForCausalLM)
10
+ base_model = AutoModelForCausalLM.from_pretrained(
11
+ base_model_name,
12
+ device_map="auto", # Distribute model across GPUs (if available)
13
+ torch_dtype=torch.float16, # Use FP16 precision to save memory
14
+ )
15
+
16
+ # Load the LoRA adapter into the base model
17
+ model_with_adapter = PeftModel.from_pretrained(
18
+ base_model,
19
+ adapter_repo,
20
+ device_map="auto",
21
+ )
22
 
23
+ # Extract the underlying base model for compatibility with pipelines
24
+ underlying_model = model_with_adapter.base_model
 
25
 
26
+ # Load the tokenizer
27
+ tokenizer = AutoTokenizer.from_pretrained(base_model_name)
28
 
29
+ # Create the text generation pipeline
30
+ pipe = pipeline("text-generation", model=underlying_model, tokenizer=tokenizer)
31
 
32
+ # Define the Gradio interface function
33
+ def generate_text(prompt):
34
+ # Use the pipeline to generate text
35
+ outputs = pipe(prompt, max_length=200)
36
  return outputs[0]["generated_text"]
37
 
38
+ # Create the Gradio interface
39
  iface = gr.Interface(
40
+ fn=generate_text,
41
  inputs="text",
42
  outputs="text",
43
+ title="LoRA-Enhanced LLaMA Text Generator",
44
+ description="Provide a prompt, and the model will generate a response."
45
  )
46
 
47
+ # Launch the app
48
  if __name__ == "__main__":
49
  iface.launch()