Spaces:

rathore11
/

PY_LLM_NEW

Paused

App Files Files Community

dharmendra commited on Jul 19

Commit

81d2ef5

1 Parent(s): 0e58b11

Updated app.py with explicit Hugging Face login and removed model.to(device)

Browse files

Files changed (1) hide show

app.py +16 -20

app.py CHANGED Viewed

@@ -12,7 +12,6 @@ import asyncio
 import json
 from langchain_community.llms import HuggingFacePipeline
 import uvicorn
-# Import the login function from huggingface_hub
 from huggingface_hub import login
 app = FastAPI()
@@ -23,39 +22,36 @@ HUGGINGFACEHUB_API_TOKEN = os.environ.get("HUGGINGFACEHUB_API_TOKEN")
 if HUGGINGFACEHUB_API_TOKEN is None:
     raise ValueError("HUGGINGFACEHUB_API_TOKEN environment variable not set.")
-# --- NEW: Explicitly log in to Hugging Face Hub ---
-# This ensures the environment is authenticated before trying to load models.
 try:
     login(token=HUGGINGFACEHUB_API_TOKEN)
     print("Successfully logged into Hugging Face Hub.")
 except Exception as e:
     print(f"Failed to log into Hugging Face Hub: {e}")
-    # You might want to raise an HTTPException here or handle this more gracefully
-    # depending on whether you want the app to start without model access.
-    # For now, we'll let the subsequent model loading attempt to fail if it must.
-# --- UPDATED: Use Mistral 7B Instruct v0.3 model ---
 model_id = "mistralai/Mistral-7B-Instruct-v0.3"
-# --- IMPORTANT FIX: Pass token to tokenizer as well ---
 tokenizer = AutoTokenizer.from_pretrained(model_id, token=HUGGINGFACEHUB_API_TOKEN)
 model = AutoModelForCausalLM.from_pretrained(
     model_id,
-    device_map="auto",
     torch_dtype=torch.bfloat16,
     trust_remote_code=True,
-    token=HUGGINGFACEHUB_API_TOKEN # Token is already passed here
 )
-if torch.backends.mps.is_available():
-    device = "mps"
-elif torch.cuda.is_available():
-    device = "cuda"
-else:
-    device = "cpu"
-model.to(device)
 # k=5 means it will keep the last 5 human-AI interaction pairs (10 messages total)
 memory = ConversationBufferWindowMemory(k=5)
@@ -65,7 +61,7 @@ llm = HuggingFacePipeline(pipeline=pipeline(
     "text-generation",
     model=model,
     tokenizer=tokenizer,
-    max_new_tokens=512,  # Allows for longer, detailed answers when required
     return_full_text=True,
     temperature=0.2,
     do_sample=True,

 import json
 from langchain_community.llms import HuggingFacePipeline
 import uvicorn
 from huggingface_hub import login
 app = FastAPI()
 if HUGGINGFACEHUB_API_TOKEN is None:
     raise ValueError("HUGGINGFACEHUB_API_TOKEN environment variable not set.")
+# --- Explicitly log in to Hugging Face Hub ---
 try:
     login(token=HUGGINGFACEHUB_API_TOKEN)
     print("Successfully logged into Hugging Face Hub.")
 except Exception as e:
     print(f"Failed to log into Hugging Face Hub: {e}")
+    # The app will likely fail to load the model if login fails, so this print is for debugging.
+# --- Use Mistral 7B Instruct v0.3 model ---
 model_id = "mistralai/Mistral-7B-Instruct-v0.3"
 tokenizer = AutoTokenizer.from_pretrained(model_id, token=HUGGINGFACEHUB_API_TOKEN)
 model = AutoModelForCausalLM.from_pretrained(
     model_id,
+    device_map="auto", # 'auto' handles device placement, including offloading
     torch_dtype=torch.bfloat16,
     trust_remote_code=True,
+    token=HUGGINGFACEHUB_API_TOKEN
 )
+# --- REMOVED: model.to(device) ---
+# When device_map="auto" is used, accelerate handles device placement.
+# Manually moving the model can cause conflicts and RuntimeErrors.
+# if torch.backends.mps.is_available():
+#     device = "mps"
+# elif torch.cuda.is_available():
+#     device = "cuda"
+# else:
+#     device = "cpu"
+# model.to(device) # This line is removed
 # k=5 means it will keep the last 5 human-AI interaction pairs (10 messages total)
 memory = ConversationBufferWindowMemory(k=5)
     "text-generation",
     model=model,
     tokenizer=tokenizer,
+    max_new_tokens=512,
     return_full_text=True,
     temperature=0.2,
     do_sample=True,