Trigger82 commited on
Commit
65c4e7d
·
verified ·
1 Parent(s): 222ea30

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -4
app.py CHANGED
@@ -1,10 +1,10 @@
1
  from transformers import AutoModelForCausalLM, AutoTokenizer
2
  from fastapi import FastAPI
3
 
4
-
5
  MODEL_ID = "rasyosef/Phi-1_5-Instruct-v0.1"
6
 
7
-
8
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
9
  model = AutoModelForCausalLM.from_pretrained(MODEL_ID)
10
 
@@ -16,7 +16,7 @@ def chat(query: str):
16
  GET /chat?query=Your+question
17
  Returns JSON: {"answer": "...model’s reply..."}
18
  """
19
-
20
  prompt = (
21
  "<|im_start|>system\nYou are a helpful assistant.<|im_end|>"
22
  "<|im_start|>user\n" + query + "<|im_end|>"
@@ -24,7 +24,7 @@ def chat(query: str):
24
  )
25
  inputs = tokenizer(prompt, return_tensors="pt")
26
  outputs = model.generate(**inputs, max_new_tokens=200)
27
-
28
  response = tokenizer.decode(
29
  outputs[0][inputs.input_ids.shape[-1]:],
30
  skip_special_tokens=True
 
1
  from transformers import AutoModelForCausalLM, AutoTokenizer
2
  from fastapi import FastAPI
3
 
4
+ # Model ID on Hugging Face
5
  MODEL_ID = "rasyosef/Phi-1_5-Instruct-v0.1"
6
 
7
+ # Load tokenizer and model from local cache (pre-downloaded in Docker build)
8
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
9
  model = AutoModelForCausalLM.from_pretrained(MODEL_ID)
10
 
 
16
  GET /chat?query=Your+question
17
  Returns JSON: {"answer": "...model’s reply..."}
18
  """
19
+ # Build the instruction‐style prompt expected by Phi‐1.5 Instruct
20
  prompt = (
21
  "<|im_start|>system\nYou are a helpful assistant.<|im_end|>"
22
  "<|im_start|>user\n" + query + "<|im_end|>"
 
24
  )
25
  inputs = tokenizer(prompt, return_tensors="pt")
26
  outputs = model.generate(**inputs, max_new_tokens=200)
27
+ # Only decode newly generated tokens (skip the “prompt” tokens)
28
  response = tokenizer.decode(
29
  outputs[0][inputs.input_ids.shape[-1]:],
30
  skip_special_tokens=True