FlawedLLM commited on
Commit
bb033c5
·
verified ·
1 Parent(s): 8a99e10

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -12
app.py CHANGED
@@ -7,19 +7,28 @@ import gradio as gr
7
  import torch
8
  from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
9
  from huggingface_hub import login, HfFolder
10
- tokenizer = AutoTokenizer.from_pretrained("FlawedLLM/Bhashini_gemma_merged16bit_clean_final", trust_remote_code=True)
11
- quantization_config = BitsAndBytesConfig(
12
- load_in_4bit=True,
13
- bnb_4bit_use_double_quant=True,
14
- bnb_4bit_quant_type="nf4",
15
- bnb_4bit_compute_dtype=torch.float16)
16
- model = AutoModelForCausalLM.from_pretrained("FlawedLLM/Bhashini_gemma_merged16bit_clean_final",
17
- device_map="auto",
18
- quantization_config=quantization_config,
19
- torch_dtype =torch.float16,
20
- low_cpu_mem_usage=True,
21
- trust_remote_code=True)
 
 
22
 
 
 
 
 
 
 
 
23
  # alpaca_prompt = You MUST copy from above!
24
  @spaces.GPU(duration=300)
25
  def chunk_it(input_command, item_list):
 
7
  import torch
8
  from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
9
  from huggingface_hub import login, HfFolder
10
+ # tokenizer = AutoTokenizer.from_pretrained("FlawedLLM/Bhashini_gemma_merged16bit_clean_final", trust_remote_code=True)
11
+ # quantization_config = BitsAndBytesConfig(
12
+ # load_in_4bit=True,
13
+ # bnb_4bit_use_double_quant=True,
14
+ # bnb_4bit_quant_type="nf4",
15
+ # bnb_4bit_compute_dtype=torch.float16)
16
+ # model = AutoModelForCausalLM.from_pretrained("FlawedLLM/Bhashini_gemma_merged16bit_clean_final",
17
+ # device_map="auto",
18
+ # quantization_config=quantization_config,
19
+ # torch_dtype =torch.float16,
20
+ # low_cpu_mem_usage=True,
21
+ # trust_remote_code=True)
22
+ from transformers import AutoModelForCausalLM, AutoTokenizer
23
+ from peft import PeftModel
24
 
25
+ # 1. Load Your Base Model and LoRA Adapter
26
+ model_name_or_path = "FlawedLLM/Bhashini_gemma_merged4bit_clean_final" # Hugging Face model or local path
27
+ lora_weights = "FlawedLLM/Bhashini_gemma_lora_clean_final" # LoRA weights
28
+
29
+ tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
30
+ model = AutoModelForCausalLM.from_pretrained(model_name_or_path, load_in_8bit=True, device_map='auto')
31
+ model = PeftModel.from_pretrained(model, lora_weights)
32
  # alpaca_prompt = You MUST copy from above!
33
  @spaces.GPU(duration=300)
34
  def chunk_it(input_command, item_list):