desert commited on
Commit
21886ee
·
1 Parent(s): f84cd21
Files changed (1) hide show
  1. app.py +7 -10
app.py CHANGED
@@ -2,20 +2,17 @@ import gradio as gr
2
  from unsloth import FastLanguageModel
3
  import torch
4
 
 
 
 
5
 
6
- # Load your model and tokenizer (make sure to adjust the path to where your model is stored)
7
- max_seq_length = 2048 # Adjust as necessary
8
- load_in_4bit = True # Enable 4-bit quantization for reduced memory usage
9
- model_path = "llama_lora_model_1" # Path to your custom model
10
-
11
- # Load the model and tokenizer
12
  model, tokenizer = FastLanguageModel.from_pretrained(
13
- model_name=model_path,
14
- max_seq_length=max_seq_length,
15
- load_in_4bit=load_in_4bit,
 
16
  )
17
 
18
-
19
  # Respond function
20
  def respond(
21
  message,
 
2
  from unsloth import FastLanguageModel
3
  import torch
4
 
5
+ max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!
6
+ dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
7
+ load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.
8
 
 
 
 
 
 
 
9
  model, tokenizer = FastLanguageModel.from_pretrained(
10
+ model_name = "llama_lora_model_1",
11
+ max_seq_length = max_seq_length,
12
+ dtype = dtype,
13
+ load_in_4bit = load_in_4bit,
14
  )
15
 
 
16
  # Respond function
17
  def respond(
18
  message,