Update main.py
Browse files
main.py
CHANGED
@@ -2,12 +2,15 @@ from fastapi import FastAPI
|
|
2 |
from pydantic import BaseModel, Field
|
3 |
from typing import List, Dict
|
4 |
from llama_cpp import Llama
|
|
|
|
|
5 |
|
6 |
# Load the Llama model with the specified path and configuration
|
7 |
llm = Llama.from_pretrained(
|
8 |
repo_id="bartowski/Llama-3.2-3B-Instruct-GGUF", # Replace with the actual model repository ID
|
9 |
filename="Llama-3.2-3B-Instruct-Q8_0.gguf", # Replace with your actual model filename if necessary
|
10 |
n_ctx=4096,
|
|
|
11 |
n_threads=2,
|
12 |
)
|
13 |
|
|
|
2 |
from pydantic import BaseModel, Field
|
3 |
from typing import List, Dict
|
4 |
from llama_cpp import Llama
|
5 |
+
import os
|
6 |
+
os.environ["HF_HOME"] = "./cache"
|
7 |
|
8 |
# Load the Llama model with the specified path and configuration
|
9 |
llm = Llama.from_pretrained(
|
10 |
repo_id="bartowski/Llama-3.2-3B-Instruct-GGUF", # Replace with the actual model repository ID
|
11 |
filename="Llama-3.2-3B-Instruct-Q8_0.gguf", # Replace with your actual model filename if necessary
|
12 |
n_ctx=4096,
|
13 |
+
cache_dir="./cache",
|
14 |
n_threads=2,
|
15 |
)
|
16 |
|