keerthanaBasavaraj commited on
Commit
749ca16
·
1 Parent(s): f30660e

add cpu bitsandbytes

Browse files
requirements.txt CHANGED
@@ -1,4 +1,5 @@
1
  streamlit
2
  transformers
3
- bitsandbytes
 
4
  accelerate
 
1
  streamlit
2
  transformers
3
+ bitsandbytes-cpu
4
+
5
  accelerate
sql_query_generator/generator.py CHANGED
@@ -1,16 +1,15 @@
1
- from transformers import AutoTokenizer, AutoModelForCausalLM
2
 
3
  def load_model(model_name="chatdb/natural-sql-7b"):
4
  """
5
- Loads the model on CPU and avoids bitsandbytes.
6
  """
7
  tokenizer = AutoTokenizer.from_pretrained(model_name)
8
- model = AutoModelForCausalLM.from_pretrained(
9
- model_name,
10
- device_map="auto", # Auto-map to CPU
11
- offload_folder="offload", # Offload to disk
12
- low_cpu_mem_usage=True, # Optimize CPU memory usage
13
  )
 
14
  return tokenizer, model
15
 
16
  def generate_sql(question, prompt_inputs, tokenizer, model, device="cpu"):
 
1
+ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
2
 
3
  def load_model(model_name="chatdb/natural-sql-7b"):
4
  """
5
+ Loads the SQL generation model with 8-bit precision.
6
  """
7
  tokenizer = AutoTokenizer.from_pretrained(model_name)
8
+ quantization_config = BitsAndBytesConfig(
9
+ load_in_8bit=True, # Enable 8-bit loading
10
+ llm_int8_threshold=6.0 # Fine-tune threshold if needed
 
 
11
  )
12
+ model = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=quantization_config)
13
  return tokenizer, model
14
 
15
  def generate_sql(question, prompt_inputs, tokenizer, model, device="cpu"):