daresearch commited on
Commit
4715b88
·
verified ·
1 Parent(s): 5824d58

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -6
app.py CHANGED
@@ -1,4 +1,4 @@
1
- import torch # Import torch for specifying data types and other functionalities
2
  from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
3
  from peft import PeftModel
4
  import gradio as gr
@@ -14,20 +14,20 @@ base_model = AutoModelForCausalLM.from_pretrained(
14
  torch_dtype=torch.float16, # Use FP16 precision to save memory
15
  )
16
 
17
- # Load the LoRA adapter into the base model
18
  model_with_adapter = PeftModel.from_pretrained(
19
  base_model,
20
  adapter_repo,
21
  device_map="auto",
22
  )
23
 
24
- # Extract the underlying base model for compatibility with pipelines
25
- underlying_model = model_with_adapter.base_model
26
 
27
- # Load the tokenizer
28
  tokenizer = AutoTokenizer.from_pretrained(base_model_name)
29
 
30
- # Create the text generation pipeline
31
  pipe = pipeline("text-generation", model=underlying_model, tokenizer=tokenizer)
32
 
33
  # Define the Gradio interface function
 
1
+ import torch
2
  from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
3
  from peft import PeftModel
4
  import gradio as gr
 
14
  torch_dtype=torch.float16, # Use FP16 precision to save memory
15
  )
16
 
17
+ # Step 2: Load the LoRA adapter into the base model
18
  model_with_adapter = PeftModel.from_pretrained(
19
  base_model,
20
  adapter_repo,
21
  device_map="auto",
22
  )
23
 
24
+ # Step 3: Extract the underlying base model from the LoRA wrapper
25
+ underlying_model = model_with_adapter.merge_and_unload() # Merges LoRA weights into base model
26
 
27
+ # Step 4: Load the tokenizer
28
  tokenizer = AutoTokenizer.from_pretrained(base_model_name)
29
 
30
+ # Step 5: Create the text generation pipeline using the underlying base model
31
  pipe = pipeline("text-generation", model=underlying_model, tokenizer=tokenizer)
32
 
33
  # Define the Gradio interface function