FlawedLLM commited on
Commit
6f18f2a
·
verified ·
1 Parent(s): b9bdc7e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -14
app.py CHANGED
@@ -64,20 +64,28 @@ from huggingface_hub import login, HfFolder
64
  # model = AutoModelForCausalLM.from_pretrained("FlawedLLM/Bhashini_9",config=config, ignore_mismatched_sizes=True).to('cuda')
65
  # Load model directly
66
 
67
- tokenizer = AutoTokenizer.from_pretrained("FlawedLLM/Bhashini89", trust_remote_code=True)
68
- quantization_config = BitsAndBytesConfig(
69
- load_in_4bit=True,
70
- bnb_4bit_use_double_quant=True,
71
- bnb_4bit_quant_type="nf4",
72
- bnb_4bit_compute_dtype=torch.float16)
73
- model = AutoModelForCausalLM.from_pretrained("FlawedLLM/Bhashini89",
74
- device_map="auto",
75
- quantization_config=quantization_config,
76
- torch_dtype =torch.float16,
77
- low_cpu_mem_usage=True,
78
- use_safetensors=True,
79
- trust_remote_code=True)
80
-
 
 
 
 
 
 
 
 
81
  @spaces.GPU(duration=300)
82
  def chunk_it(input_command, item_list):
83
  alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
 
64
  # model = AutoModelForCausalLM.from_pretrained("FlawedLLM/Bhashini_9",config=config, ignore_mismatched_sizes=True).to('cuda')
65
  # Load model directly
66
 
67
+ # tokenizer = AutoTokenizer.from_pretrained("FlawedLLM/Bhashini89", trust_remote_code=True)
68
+ # quantization_config = BitsAndBytesConfig(
69
+ # load_in_4bit=True,
70
+ # bnb_4bit_use_double_quant=True,
71
+ # bnb_4bit_quant_type="nf4",
72
+ # bnb_4bit_compute_dtype=torch.float16)
73
+ # model = AutoModelForCausalLM.from_pretrained("FlawedLLM/Bhashini89",
74
+ # device_map="auto",
75
+ # quantization_config=quantization_config,
76
+ # torch_dtype =torch.float16,
77
+ # low_cpu_mem_usage=True,
78
+ # use_safetensors=True,
79
+ # trust_remote_code=True)
80
+ from unsloth import FastLanguageModel
81
+ model, tokenizer = FastLanguageModel.from_pretrained(
82
+ model_name = "FlawedLLM/Bhashini_gemma_lora_clean_final", # YOUR MODEL YOU USED FOR TRAINING
83
+ max_seq_length = max_seq_length,
84
+ dtype = dtype,
85
+ load_in_4bit = load_in_4bit,)
86
+ FastLanguageModel.for_inference(model) # Enable native 2x faster inference
87
+
88
+ # alpaca_prompt = You MUST copy from above!
89
  @spaces.GPU(duration=300)
90
  def chunk_it(input_command, item_list):
91
  alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.