Spaces:
Sleeping
Sleeping
Update main.py
Browse files
main.py
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
from fastapi import FastAPI
|
2 |
-
from transformers import AutoModelForCausalLM, AutoTokenizer
|
3 |
import torch
|
4 |
from datasets import load_dataset
|
5 |
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Settings
|
@@ -43,7 +43,7 @@ login(hf_token)
|
|
43 |
# Load Dataset and Prepare Knowledge Base
|
44 |
ds = load_dataset("codeparrot/apps", "all", split="train")
|
45 |
os.makedirs("knowledge_base", exist_ok=True)
|
46 |
-
for i, example in enumerate(ds.select(range(
|
47 |
solution = example['solutions'][0] if example['solutions'] else "No solution available"
|
48 |
with open(f"knowledge_base/doc_{i}.txt", "w", encoding="utf-8") as f:
|
49 |
f.write(f"### Problem\n{example['question']}\n\n### Solution\n{solution}")
|
@@ -57,20 +57,14 @@ faiss_index = faiss.IndexFlatL2(d)
|
|
57 |
vector_store = FaissVectorStore(faiss_index=faiss_index)
|
58 |
index = VectorStoreIndex.from_documents(documents, vector_store=vector_store)
|
59 |
|
60 |
-
# Load LLaMA Model
|
61 |
model_name = "meta-llama/Llama-3.2-1B-Instruct"
|
62 |
-
quant_config = BitsAndBytesConfig(
|
63 |
-
load_in_4bit=True,
|
64 |
-
bnb_4bit_compute_dtype=torch.float16,
|
65 |
-
bnb_4bit_quant_type="nf4",
|
66 |
-
bnb_4bit_use_double_quant=True
|
67 |
-
)
|
68 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
69 |
-
device = "
|
70 |
model = AutoModelForCausalLM.from_pretrained(
|
71 |
model_name,
|
72 |
-
|
73 |
-
|
74 |
)
|
75 |
if tokenizer.pad_token is None:
|
76 |
tokenizer.pad_token = tokenizer.eos_token
|
|
|
1 |
from fastapi import FastAPI
|
2 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
3 |
import torch
|
4 |
from datasets import load_dataset
|
5 |
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Settings
|
|
|
43 |
# Load Dataset and Prepare Knowledge Base
|
44 |
ds = load_dataset("codeparrot/apps", "all", split="train")
|
45 |
os.makedirs("knowledge_base", exist_ok=True)
|
46 |
+
for i, example in enumerate(ds.select(range(50))): # Reduced to 50 for memory
|
47 |
solution = example['solutions'][0] if example['solutions'] else "No solution available"
|
48 |
with open(f"knowledge_base/doc_{i}.txt", "w", encoding="utf-8") as f:
|
49 |
f.write(f"### Problem\n{example['question']}\n\n### Solution\n{solution}")
|
|
|
57 |
vector_store = FaissVectorStore(faiss_index=faiss_index)
|
58 |
index = VectorStoreIndex.from_documents(documents, vector_store=vector_store)
|
59 |
|
60 |
+
# Load LLaMA Model (without quantization, on CPU)
|
61 |
model_name = "meta-llama/Llama-3.2-1B-Instruct"
|
|
|
|
|
|
|
|
|
|
|
|
|
62 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
63 |
+
device = "cpu" # Force CPU usage
|
64 |
model = AutoModelForCausalLM.from_pretrained(
|
65 |
model_name,
|
66 |
+
device_map="cpu", # Explicitly map to CPU
|
67 |
+
torch_dtype=torch.float32 # Use float32 for CPU compatibility
|
68 |
)
|
69 |
if tokenizer.pad_token is None:
|
70 |
tokenizer.pad_token = tokenizer.eos_token
|