GuhanAein commited on
Commit
98e2f27
·
verified ·
1 Parent(s): 0aaba4f

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +6 -12
main.py CHANGED
@@ -1,5 +1,5 @@
1
  from fastapi import FastAPI
2
- from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
3
  import torch
4
  from datasets import load_dataset
5
  from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Settings
@@ -43,7 +43,7 @@ login(hf_token)
43
  # Load Dataset and Prepare Knowledge Base
44
  ds = load_dataset("codeparrot/apps", "all", split="train")
45
  os.makedirs("knowledge_base", exist_ok=True)
46
- for i, example in enumerate(ds.select(range(100))):
47
  solution = example['solutions'][0] if example['solutions'] else "No solution available"
48
  with open(f"knowledge_base/doc_{i}.txt", "w", encoding="utf-8") as f:
49
  f.write(f"### Problem\n{example['question']}\n\n### Solution\n{solution}")
@@ -57,20 +57,14 @@ faiss_index = faiss.IndexFlatL2(d)
57
  vector_store = FaissVectorStore(faiss_index=faiss_index)
58
  index = VectorStoreIndex.from_documents(documents, vector_store=vector_store)
59
 
60
- # Load LLaMA Model
61
  model_name = "meta-llama/Llama-3.2-1B-Instruct"
62
- quant_config = BitsAndBytesConfig(
63
- load_in_4bit=True,
64
- bnb_4bit_compute_dtype=torch.float16,
65
- bnb_4bit_quant_type="nf4",
66
- bnb_4bit_use_double_quant=True
67
- )
68
  tokenizer = AutoTokenizer.from_pretrained(model_name)
69
- device = "cuda" if torch.cuda.is_available() else "cpu"
70
  model = AutoModelForCausalLM.from_pretrained(
71
  model_name,
72
- quantization_config=quant_config,
73
- device_map="auto" if device == "cuda" else None
74
  )
75
  if tokenizer.pad_token is None:
76
  tokenizer.pad_token = tokenizer.eos_token
 
1
  from fastapi import FastAPI
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer
3
  import torch
4
  from datasets import load_dataset
5
  from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Settings
 
43
  # Load Dataset and Prepare Knowledge Base
44
  ds = load_dataset("codeparrot/apps", "all", split="train")
45
  os.makedirs("knowledge_base", exist_ok=True)
46
+ for i, example in enumerate(ds.select(range(50))): # Reduced to 50 for memory
47
  solution = example['solutions'][0] if example['solutions'] else "No solution available"
48
  with open(f"knowledge_base/doc_{i}.txt", "w", encoding="utf-8") as f:
49
  f.write(f"### Problem\n{example['question']}\n\n### Solution\n{solution}")
 
57
  vector_store = FaissVectorStore(faiss_index=faiss_index)
58
  index = VectorStoreIndex.from_documents(documents, vector_store=vector_store)
59
 
60
+ # Load LLaMA Model (without quantization, on CPU)
61
  model_name = "meta-llama/Llama-3.2-1B-Instruct"
 
 
 
 
 
 
62
  tokenizer = AutoTokenizer.from_pretrained(model_name)
63
+ device = "cpu" # Force CPU usage
64
  model = AutoModelForCausalLM.from_pretrained(
65
  model_name,
66
+ device_map="cpu", # Explicitly map to CPU
67
+ torch_dtype=torch.float32 # Use float32 for CPU compatibility
68
  )
69
  if tokenizer.pad_token is None:
70
  tokenizer.pad_token = tokenizer.eos_token