dharmendra commited on
Commit
81d2ef5
·
1 Parent(s): 0e58b11

Updated app.py with explicit Hugging Face login and removed model.to(device)

Browse files
Files changed (1) hide show
  1. app.py +16 -20
app.py CHANGED
@@ -12,7 +12,6 @@ import asyncio
12
  import json
13
  from langchain_community.llms import HuggingFacePipeline
14
  import uvicorn
15
- # Import the login function from huggingface_hub
16
  from huggingface_hub import login
17
 
18
  app = FastAPI()
@@ -23,39 +22,36 @@ HUGGINGFACEHUB_API_TOKEN = os.environ.get("HUGGINGFACEHUB_API_TOKEN")
23
  if HUGGINGFACEHUB_API_TOKEN is None:
24
  raise ValueError("HUGGINGFACEHUB_API_TOKEN environment variable not set.")
25
 
26
- # --- NEW: Explicitly log in to Hugging Face Hub ---
27
- # This ensures the environment is authenticated before trying to load models.
28
  try:
29
  login(token=HUGGINGFACEHUB_API_TOKEN)
30
  print("Successfully logged into Hugging Face Hub.")
31
  except Exception as e:
32
  print(f"Failed to log into Hugging Face Hub: {e}")
33
- # You might want to raise an HTTPException here or handle this more gracefully
34
- # depending on whether you want the app to start without model access.
35
- # For now, we'll let the subsequent model loading attempt to fail if it must.
36
 
37
-
38
- # --- UPDATED: Use Mistral 7B Instruct v0.3 model ---
39
  model_id = "mistralai/Mistral-7B-Instruct-v0.3"
40
 
41
- # --- IMPORTANT FIX: Pass token to tokenizer as well ---
42
  tokenizer = AutoTokenizer.from_pretrained(model_id, token=HUGGINGFACEHUB_API_TOKEN)
43
  model = AutoModelForCausalLM.from_pretrained(
44
  model_id,
45
- device_map="auto",
46
  torch_dtype=torch.bfloat16,
47
  trust_remote_code=True,
48
- token=HUGGINGFACEHUB_API_TOKEN # Token is already passed here
49
  )
50
 
51
- if torch.backends.mps.is_available():
52
- device = "mps"
53
- elif torch.cuda.is_available():
54
- device = "cuda"
55
- else:
56
- device = "cpu"
57
-
58
- model.to(device)
 
 
59
 
60
  # k=5 means it will keep the last 5 human-AI interaction pairs (10 messages total)
61
  memory = ConversationBufferWindowMemory(k=5)
@@ -65,7 +61,7 @@ llm = HuggingFacePipeline(pipeline=pipeline(
65
  "text-generation",
66
  model=model,
67
  tokenizer=tokenizer,
68
- max_new_tokens=512, # Allows for longer, detailed answers when required
69
  return_full_text=True,
70
  temperature=0.2,
71
  do_sample=True,
 
12
  import json
13
  from langchain_community.llms import HuggingFacePipeline
14
  import uvicorn
 
15
  from huggingface_hub import login
16
 
17
  app = FastAPI()
 
22
  if HUGGINGFACEHUB_API_TOKEN is None:
23
  raise ValueError("HUGGINGFACEHUB_API_TOKEN environment variable not set.")
24
 
25
+ # --- Explicitly log in to Hugging Face Hub ---
 
26
  try:
27
  login(token=HUGGINGFACEHUB_API_TOKEN)
28
  print("Successfully logged into Hugging Face Hub.")
29
  except Exception as e:
30
  print(f"Failed to log into Hugging Face Hub: {e}")
31
+ # The app will likely fail to load the model if login fails, so this print is for debugging.
 
 
32
 
33
+ # --- Use Mistral 7B Instruct v0.3 model ---
 
34
  model_id = "mistralai/Mistral-7B-Instruct-v0.3"
35
 
 
36
  tokenizer = AutoTokenizer.from_pretrained(model_id, token=HUGGINGFACEHUB_API_TOKEN)
37
  model = AutoModelForCausalLM.from_pretrained(
38
  model_id,
39
+ device_map="auto", # 'auto' handles device placement, including offloading
40
  torch_dtype=torch.bfloat16,
41
  trust_remote_code=True,
42
+ token=HUGGINGFACEHUB_API_TOKEN
43
  )
44
 
45
+ # --- REMOVED: model.to(device) ---
46
+ # When device_map="auto" is used, accelerate handles device placement.
47
+ # Manually moving the model can cause conflicts and RuntimeErrors.
48
+ # if torch.backends.mps.is_available():
49
+ # device = "mps"
50
+ # elif torch.cuda.is_available():
51
+ # device = "cuda"
52
+ # else:
53
+ # device = "cpu"
54
+ # model.to(device) # This line is removed
55
 
56
  # k=5 means it will keep the last 5 human-AI interaction pairs (10 messages total)
57
  memory = ConversationBufferWindowMemory(k=5)
 
61
  "text-generation",
62
  model=model,
63
  tokenizer=tokenizer,
64
+ max_new_tokens=512,
65
  return_full_text=True,
66
  temperature=0.2,
67
  do_sample=True,