Spaces:
Paused
Paused
dharmendra
commited on
Commit
·
81d2ef5
1
Parent(s):
0e58b11
Updated app.py with explicit Hugging Face login and removed model.to(device)
Browse files
app.py
CHANGED
@@ -12,7 +12,6 @@ import asyncio
|
|
12 |
import json
|
13 |
from langchain_community.llms import HuggingFacePipeline
|
14 |
import uvicorn
|
15 |
-
# Import the login function from huggingface_hub
|
16 |
from huggingface_hub import login
|
17 |
|
18 |
app = FastAPI()
|
@@ -23,39 +22,36 @@ HUGGINGFACEHUB_API_TOKEN = os.environ.get("HUGGINGFACEHUB_API_TOKEN")
|
|
23 |
if HUGGINGFACEHUB_API_TOKEN is None:
|
24 |
raise ValueError("HUGGINGFACEHUB_API_TOKEN environment variable not set.")
|
25 |
|
26 |
-
# ---
|
27 |
-
# This ensures the environment is authenticated before trying to load models.
|
28 |
try:
|
29 |
login(token=HUGGINGFACEHUB_API_TOKEN)
|
30 |
print("Successfully logged into Hugging Face Hub.")
|
31 |
except Exception as e:
|
32 |
print(f"Failed to log into Hugging Face Hub: {e}")
|
33 |
-
#
|
34 |
-
# depending on whether you want the app to start without model access.
|
35 |
-
# For now, we'll let the subsequent model loading attempt to fail if it must.
|
36 |
|
37 |
-
|
38 |
-
# --- UPDATED: Use Mistral 7B Instruct v0.3 model ---
|
39 |
model_id = "mistralai/Mistral-7B-Instruct-v0.3"
|
40 |
|
41 |
-
# --- IMPORTANT FIX: Pass token to tokenizer as well ---
|
42 |
tokenizer = AutoTokenizer.from_pretrained(model_id, token=HUGGINGFACEHUB_API_TOKEN)
|
43 |
model = AutoModelForCausalLM.from_pretrained(
|
44 |
model_id,
|
45 |
-
device_map="auto",
|
46 |
torch_dtype=torch.bfloat16,
|
47 |
trust_remote_code=True,
|
48 |
-
token=HUGGINGFACEHUB_API_TOKEN
|
49 |
)
|
50 |
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
|
|
|
|
59 |
|
60 |
# k=5 means it will keep the last 5 human-AI interaction pairs (10 messages total)
|
61 |
memory = ConversationBufferWindowMemory(k=5)
|
@@ -65,7 +61,7 @@ llm = HuggingFacePipeline(pipeline=pipeline(
|
|
65 |
"text-generation",
|
66 |
model=model,
|
67 |
tokenizer=tokenizer,
|
68 |
-
max_new_tokens=512,
|
69 |
return_full_text=True,
|
70 |
temperature=0.2,
|
71 |
do_sample=True,
|
|
|
12 |
import json
|
13 |
from langchain_community.llms import HuggingFacePipeline
|
14 |
import uvicorn
|
|
|
15 |
from huggingface_hub import login
|
16 |
|
17 |
app = FastAPI()
|
|
|
22 |
if HUGGINGFACEHUB_API_TOKEN is None:
|
23 |
raise ValueError("HUGGINGFACEHUB_API_TOKEN environment variable not set.")
|
24 |
|
25 |
+
# --- Explicitly log in to Hugging Face Hub ---
|
|
|
26 |
try:
|
27 |
login(token=HUGGINGFACEHUB_API_TOKEN)
|
28 |
print("Successfully logged into Hugging Face Hub.")
|
29 |
except Exception as e:
|
30 |
print(f"Failed to log into Hugging Face Hub: {e}")
|
31 |
+
# The app will likely fail to load the model if login fails, so this print is for debugging.
|
|
|
|
|
32 |
|
33 |
+
# --- Use Mistral 7B Instruct v0.3 model ---
|
|
|
34 |
model_id = "mistralai/Mistral-7B-Instruct-v0.3"
|
35 |
|
|
|
36 |
tokenizer = AutoTokenizer.from_pretrained(model_id, token=HUGGINGFACEHUB_API_TOKEN)
|
37 |
model = AutoModelForCausalLM.from_pretrained(
|
38 |
model_id,
|
39 |
+
device_map="auto", # 'auto' handles device placement, including offloading
|
40 |
torch_dtype=torch.bfloat16,
|
41 |
trust_remote_code=True,
|
42 |
+
token=HUGGINGFACEHUB_API_TOKEN
|
43 |
)
|
44 |
|
45 |
+
# --- REMOVED: model.to(device) ---
|
46 |
+
# When device_map="auto" is used, accelerate handles device placement.
|
47 |
+
# Manually moving the model can cause conflicts and RuntimeErrors.
|
48 |
+
# if torch.backends.mps.is_available():
|
49 |
+
# device = "mps"
|
50 |
+
# elif torch.cuda.is_available():
|
51 |
+
# device = "cuda"
|
52 |
+
# else:
|
53 |
+
# device = "cpu"
|
54 |
+
# model.to(device) # This line is removed
|
55 |
|
56 |
# k=5 means it will keep the last 5 human-AI interaction pairs (10 messages total)
|
57 |
memory = ConversationBufferWindowMemory(k=5)
|
|
|
61 |
"text-generation",
|
62 |
model=model,
|
63 |
tokenizer=tokenizer,
|
64 |
+
max_new_tokens=512,
|
65 |
return_full_text=True,
|
66 |
temperature=0.2,
|
67 |
do_sample=True,
|