Trigger82 commited on
Commit
7d7624b
·
verified ·
1 Parent(s): 1a2f674

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -13
app.py CHANGED
@@ -1,23 +1,24 @@
1
  # app.py
2
- import os
3
  from transformers import AutoModelForCausalLM, AutoTokenizer
4
  from fastapi import FastAPI
5
 
6
- # Ensure cache env vars point to writable directory (same as Dockerfile)
7
- home = os.environ.get("HOME", "/home/user")
8
- cache_dir = os.path.join(home, ".cache", "huggingface")
9
- os.makedirs(cache_dir, exist_ok=True)
10
- os.environ["HF_HOME"] = cache_dir
11
- os.environ["TRANSFORMERS_CACHE"] = cache_dir
12
 
13
- model_id = "rasyosef/Phi-1_5-Instruct-v0.1"
14
- model = AutoModelForCausalLM.from_pretrained(model_id)
15
- tokenizer = AutoTokenizer.from_pretrained(model_id)
16
 
17
  app = FastAPI()
 
18
  @app.get("/chat")
19
  def chat(query: str):
20
- # Compose chat-format prompt (system + user) for Phi-1.5
 
 
 
 
21
  prompt = (
22
  "<|im_start|>system\nYou are a helpful assistant.<|im_end|>"
23
  "<|im_start|>user\n" + query + "<|im_end|>"
@@ -25,8 +26,9 @@ def chat(query: str):
25
  )
26
  inputs = tokenizer(prompt, return_tensors="pt")
27
  outputs = model.generate(**inputs, max_new_tokens=200)
28
- # Decode only the newly generated tokens (skip input tokens)
29
  response = tokenizer.decode(
30
- outputs[0][inputs.input_ids.shape[-1]:], skip_special_tokens=True
 
31
  )
32
  return {"answer": response.strip()}
 
1
  # app.py
2
+
3
  from transformers import AutoModelForCausalLM, AutoTokenizer
4
  from fastapi import FastAPI
5
 
6
+ # Model ID on Hugging Face
7
+ MODEL_ID = "rasyosef/Phi-1_5-Instruct-v0.1"
 
 
 
 
8
 
9
+ # Load tokenizer and model from local cache (pre-downloaded in Docker build)
10
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
11
+ model = AutoModelForCausalLM.from_pretrained(MODEL_ID)
12
 
13
  app = FastAPI()
14
+
15
  @app.get("/chat")
16
  def chat(query: str):
17
+ """
18
+ GET /chat?query=Your+question
19
+ Returns JSON: {"answer": "...model’s reply..."}
20
+ """
21
+ # Build the instruction‐style prompt expected by Phi‐1.5 Instruct
22
  prompt = (
23
  "<|im_start|>system\nYou are a helpful assistant.<|im_end|>"
24
  "<|im_start|>user\n" + query + "<|im_end|>"
 
26
  )
27
  inputs = tokenizer(prompt, return_tensors="pt")
28
  outputs = model.generate(**inputs, max_new_tokens=200)
29
+ # Only decode newly generated tokens (skip the “prompt” tokens)
30
  response = tokenizer.decode(
31
+ outputs[0][inputs.input_ids.shape[-1]:],
32
+ skip_special_tokens=True
33
  )
34
  return {"answer": response.strip()}