Spaces:
Paused
Paused
dharmendra
commited on
Commit
·
73ab258
1
Parent(s):
5343cd4
Switched to Mistral 7B Instruct v0.3 model
Browse files
app.py
CHANGED
@@ -10,7 +10,6 @@ from langchain.prompts import PromptTemplate
|
|
10 |
from starlette.responses import StreamingResponse
|
11 |
import asyncio
|
12 |
import json
|
13 |
-
# Corrected import: 'llms' instead of 'llls'
|
14 |
from langchain_community.llms import HuggingFacePipeline
|
15 |
import uvicorn
|
16 |
|
@@ -22,14 +21,14 @@ HUGGINGFACEHUB_API_TOKEN = os.environ.get("HUGGINGFACEHUB_API_TOKEN")
|
|
22 |
if HUGGINGFACEHUB_API_TOKEN is None:
|
23 |
raise ValueError("HUGGINGFACEHUB_API_TOKEN environment variable not set.")
|
24 |
|
25 |
-
# --- UPDATED: Use
|
26 |
-
model_id = "
|
27 |
|
28 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
29 |
model = AutoModelForCausalLM.from_pretrained(
|
30 |
model_id,
|
31 |
device_map="auto",
|
32 |
-
torch_dtype=torch.bfloat16, # torch.bfloat16 is generally good
|
33 |
trust_remote_code=True,
|
34 |
token=HUGGINGFACEHUB_API_TOKEN
|
35 |
)
|
@@ -52,16 +51,13 @@ llm = HuggingFacePipeline(pipeline=pipeline(
|
|
52 |
model=model,
|
53 |
tokenizer=tokenizer,
|
54 |
max_new_tokens=512, # Allows for longer, detailed answers when required
|
55 |
-
|
56 |
-
return_full_text=True,
|
57 |
temperature=0.2, # Controls randomness (0.0 for deterministic, 1.0 for very creative)
|
58 |
do_sample=True, # Enable sampling for more varied outputs
|
59 |
-
# --- IMPORTANT FIX: REMOVED stop_sequence from pipeline initialization ---
|
60 |
-
# This prevents the TypeError and we handle stopping manually below.
|
61 |
))
|
62 |
|
63 |
# --- UPDATED PROMPT TEMPLATE ---
|
64 |
-
#
|
65 |
template = """<|im_start|>system
|
66 |
You are a concise and direct AI assistant named Siddhi.
|
67 |
You strictly avoid asking any follow-up questions.
|
|
|
10 |
from starlette.responses import StreamingResponse
|
11 |
import asyncio
|
12 |
import json
|
|
|
13 |
from langchain_community.llms import HuggingFacePipeline
|
14 |
import uvicorn
|
15 |
|
|
|
21 |
if HUGGINGFACEHUB_API_TOKEN is None:
|
22 |
raise ValueError("HUGGINGFACEHUB_API_TOKEN environment variable not set.")
|
23 |
|
24 |
+
# --- UPDATED: Use Mistral 7B Instruct v0.3 model ---
|
25 |
+
model_id = "mistralai/Mistral-7B-Instruct-v0.3"
|
26 |
|
27 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
28 |
model = AutoModelForCausalLM.from_pretrained(
|
29 |
model_id,
|
30 |
device_map="auto",
|
31 |
+
torch_dtype=torch.bfloat16, # torch.bfloat16 is generally good, can try torch.float16 if issues arise with Mistral
|
32 |
trust_remote_code=True,
|
33 |
token=HUGGINGFACEHUB_API_TOKEN
|
34 |
)
|
|
|
51 |
model=model,
|
52 |
tokenizer=tokenizer,
|
53 |
max_new_tokens=512, # Allows for longer, detailed answers when required
|
54 |
+
return_full_text=True, # Important for manual slicing of AI's response
|
|
|
55 |
temperature=0.2, # Controls randomness (0.0 for deterministic, 1.0 for very creative)
|
56 |
do_sample=True, # Enable sampling for more varied outputs
|
|
|
|
|
57 |
))
|
58 |
|
59 |
# --- UPDATED PROMPT TEMPLATE ---
|
60 |
+
# The Llama-style chat format with <|im_start|> and <|im_end|> is generally compatible with Mistral Instruct models.
|
61 |
template = """<|im_start|>system
|
62 |
You are a concise and direct AI assistant named Siddhi.
|
63 |
You strictly avoid asking any follow-up questions.
|