dharmendra commited on
Commit
73ab258
·
1 Parent(s): 5343cd4

Switched to Mistral 7B Instruct v0.3 model

Browse files
Files changed (1) hide show
  1. app.py +5 -9
app.py CHANGED
@@ -10,7 +10,6 @@ from langchain.prompts import PromptTemplate
10
  from starlette.responses import StreamingResponse
11
  import asyncio
12
  import json
13
- # Corrected import: 'llms' instead of 'llls'
14
  from langchain_community.llms import HuggingFacePipeline
15
  import uvicorn
16
 
@@ -22,14 +21,14 @@ HUGGINGFACEHUB_API_TOKEN = os.environ.get("HUGGINGFACEHUB_API_TOKEN")
22
  if HUGGINGFACEHUB_API_TOKEN is None:
23
  raise ValueError("HUGGINGFACEHUB_API_TOKEN environment variable not set.")
24
 
25
- # --- UPDATED: Use Llama 3.1 8B Instruct model ---
26
- model_id = "meta-llama/Meta-Llama-3.1-8B-Instruct"
27
 
28
  tokenizer = AutoTokenizer.from_pretrained(model_id)
29
  model = AutoModelForCausalLM.from_pretrained(
30
  model_id,
31
  device_map="auto",
32
- torch_dtype=torch.bfloat16, # torch.bfloat16 is generally good for Llama, can try torch.float16 if issues
33
  trust_remote_code=True,
34
  token=HUGGINGFACEHUB_API_TOKEN
35
  )
@@ -52,16 +51,13 @@ llm = HuggingFacePipeline(pipeline=pipeline(
52
  model=model,
53
  tokenizer=tokenizer,
54
  max_new_tokens=512, # Allows for longer, detailed answers when required
55
- # --- IMPORTANT FIX: Set return_full_text to True and handle slicing manually ---
56
- return_full_text=True,
57
  temperature=0.2, # Controls randomness (0.0 for deterministic, 1.0 for very creative)
58
  do_sample=True, # Enable sampling for more varied outputs
59
- # --- IMPORTANT FIX: REMOVED stop_sequence from pipeline initialization ---
60
- # This prevents the TypeError and we handle stopping manually below.
61
  ))
62
 
63
  # --- UPDATED PROMPT TEMPLATE ---
64
- # Using the recommended chat format for Llama models and explicit instructions.
65
  template = """<|im_start|>system
66
  You are a concise and direct AI assistant named Siddhi.
67
  You strictly avoid asking any follow-up questions.
 
10
  from starlette.responses import StreamingResponse
11
  import asyncio
12
  import json
 
13
  from langchain_community.llms import HuggingFacePipeline
14
  import uvicorn
15
 
 
21
  if HUGGINGFACEHUB_API_TOKEN is None:
22
  raise ValueError("HUGGINGFACEHUB_API_TOKEN environment variable not set.")
23
 
24
+ # --- UPDATED: Use Mistral 7B Instruct v0.3 model ---
25
+ model_id = "mistralai/Mistral-7B-Instruct-v0.3"
26
 
27
  tokenizer = AutoTokenizer.from_pretrained(model_id)
28
  model = AutoModelForCausalLM.from_pretrained(
29
  model_id,
30
  device_map="auto",
31
+ torch_dtype=torch.bfloat16, # torch.bfloat16 is generally good, can try torch.float16 if issues arise with Mistral
32
  trust_remote_code=True,
33
  token=HUGGINGFACEHUB_API_TOKEN
34
  )
 
51
  model=model,
52
  tokenizer=tokenizer,
53
  max_new_tokens=512, # Allows for longer, detailed answers when required
54
+ return_full_text=True, # Important for manual slicing of AI's response
 
55
  temperature=0.2, # Controls randomness (0.0 for deterministic, 1.0 for very creative)
56
  do_sample=True, # Enable sampling for more varied outputs
 
 
57
  ))
58
 
59
  # --- UPDATED PROMPT TEMPLATE ---
60
+ # The Llama-style chat format with <|im_start|> and <|im_end|> is generally compatible with Mistral Instruct models.
61
  template = """<|im_start|>system
62
  You are a concise and direct AI assistant named Siddhi.
63
  You strictly avoid asking any follow-up questions.