Spaces:

rathore11
/

PY_LLM_NEW

Paused

App Files Files Community

dharmendra commited on Jul 19

Commit

73ab258

1 Parent(s): 5343cd4

Switched to Mistral 7B Instruct v0.3 model

Browse files

Files changed (1) hide show

app.py +5 -9

app.py CHANGED Viewed

@@ -10,7 +10,6 @@ from langchain.prompts import PromptTemplate
 from starlette.responses import StreamingResponse
 import asyncio
 import json
-# Corrected import: 'llms' instead of 'llls'
 from langchain_community.llms import HuggingFacePipeline
 import uvicorn
@@ -22,14 +21,14 @@ HUGGINGFACEHUB_API_TOKEN = os.environ.get("HUGGINGFACEHUB_API_TOKEN")
 if HUGGINGFACEHUB_API_TOKEN is None:
     raise ValueError("HUGGINGFACEHUB_API_TOKEN environment variable not set.")
-# --- UPDATED: Use Llama 3.1 8B Instruct model ---
-model_id = "meta-llama/Meta-Llama-3.1-8B-Instruct"
 tokenizer = AutoTokenizer.from_pretrained(model_id)
 model = AutoModelForCausalLM.from_pretrained(
     model_id,
     device_map="auto",
-    torch_dtype=torch.bfloat16, # torch.bfloat16 is generally good for Llama, can try torch.float16 if issues
     trust_remote_code=True,
     token=HUGGINGFACEHUB_API_TOKEN
 )
@@ -52,16 +51,13 @@ llm = HuggingFacePipeline(pipeline=pipeline(
     model=model,
     tokenizer=tokenizer,
     max_new_tokens=512,  # Allows for longer, detailed answers when required
-    # --- IMPORTANT FIX: Set return_full_text to True and handle slicing manually ---
-    return_full_text=True,
     temperature=0.2,      # Controls randomness (0.0 for deterministic, 1.0 for very creative)
     do_sample=True,        # Enable sampling for more varied outputs
-    # --- IMPORTANT FIX: REMOVED stop_sequence from pipeline initialization ---
-    # This prevents the TypeError and we handle stopping manually below.
 ))
 # --- UPDATED PROMPT TEMPLATE ---
-# Using the recommended chat format for Llama models and explicit instructions.
 template = """<|im_start|>system
 You are a concise and direct AI assistant named Siddhi.
 You strictly avoid asking any follow-up questions.

 from starlette.responses import StreamingResponse
 import asyncio
 import json
 from langchain_community.llms import HuggingFacePipeline
 import uvicorn
 if HUGGINGFACEHUB_API_TOKEN is None:
     raise ValueError("HUGGINGFACEHUB_API_TOKEN environment variable not set.")
+# --- UPDATED: Use Mistral 7B Instruct v0.3 model ---
+model_id = "mistralai/Mistral-7B-Instruct-v0.3"
 tokenizer = AutoTokenizer.from_pretrained(model_id)
 model = AutoModelForCausalLM.from_pretrained(
     model_id,
     device_map="auto",
+    torch_dtype=torch.bfloat16, # torch.bfloat16 is generally good, can try torch.float16 if issues arise with Mistral
     trust_remote_code=True,
     token=HUGGINGFACEHUB_API_TOKEN
 )
     model=model,
     tokenizer=tokenizer,
     max_new_tokens=512,  # Allows for longer, detailed answers when required
+    return_full_text=True, # Important for manual slicing of AI's response
     temperature=0.2,      # Controls randomness (0.0 for deterministic, 1.0 for very creative)
     do_sample=True,        # Enable sampling for more varied outputs
 ))
 # --- UPDATED PROMPT TEMPLATE ---
+# The Llama-style chat format with <|im_start|> and <|im_end|> is generally compatible with Mistral Instruct models.
 template = """<|im_start|>system
 You are a concise and direct AI assistant named Siddhi.
 You strictly avoid asking any follow-up questions.