Spaces:

daresearch
/

llama-70-merge-space

Runtime error

daresearch commited on Dec 24, 2024

Commit

4715b88

verified ·

1 Parent(s): 5824d58

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,4 +1,4 @@
-import torch  # Import torch for specifying data types and other functionalities
 from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
 from peft import PeftModel
 import gradio as gr
@@ -14,20 +14,20 @@ base_model = AutoModelForCausalLM.from_pretrained(
     torch_dtype=torch.float16,   # Use FP16 precision to save memory
 )
-# Load the LoRA adapter into the base model
 model_with_adapter = PeftModel.from_pretrained(
     base_model,
     adapter_repo,
     device_map="auto",
 )
-# Extract the underlying base model for compatibility with pipelines
-underlying_model = model_with_adapter.base_model
-# Load the tokenizer
 tokenizer = AutoTokenizer.from_pretrained(base_model_name)
-# Create the text generation pipeline
 pipe = pipeline("text-generation", model=underlying_model, tokenizer=tokenizer)
 # Define the Gradio interface function

+import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
 from peft import PeftModel
 import gradio as gr
     torch_dtype=torch.float16,   # Use FP16 precision to save memory
 )
+# Step 2: Load the LoRA adapter into the base model
 model_with_adapter = PeftModel.from_pretrained(
     base_model,
     adapter_repo,
     device_map="auto",
 )
+# Step 3: Extract the underlying base model from the LoRA wrapper
+underlying_model = model_with_adapter.merge_and_unload()  # Merges LoRA weights into base model
+# Step 4: Load the tokenizer
 tokenizer = AutoTokenizer.from_pretrained(base_model_name)
+# Step 5: Create the text generation pipeline using the underlying base model
 pipe = pipeline("text-generation", model=underlying_model, tokenizer=tokenizer)
 # Define the Gradio interface function