Trigger82 commited on
Commit
80cf0d1
·
verified ·
1 Parent(s): 5a31152

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -23
app.py CHANGED
@@ -1,39 +1,24 @@
1
- from transformers import AutoModelForCausalLM, AutoTokenizer
2
  from fastapi import FastAPI
3
 
4
- # Model ID on Hugging Face
5
- MODEL_ID = "rasyosef/Phi-1_5-Instruct-v0.1"
6
 
7
- # Load tokenizer and model from local cache (pre-downloaded in Docker build)
 
 
8
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
9
- model = AutoModelForCausalLM.from_pretrained(MODEL_ID)
10
 
11
  app = FastAPI()
12
 
13
- @app.get("/")
14
- def root():
15
- return {
16
- "message": "✅ Trigger82 AI API is running!",
17
- "usage": "Use /chat?query=Your+question to get a response."
18
- }
19
-
20
  @app.get("/chat")
21
  def chat(query: str):
22
  """
23
  GET /chat?query=Your+question
24
  Returns JSON: {"answer": "...model’s reply..."}
25
  """
26
- # Build the instruction‐style prompt expected by Phi‐1.5 Instruct
27
- prompt = (
28
- "<|im_start|>system\nYou are a helpful assistant.<|im_end|>"
29
- "<|im_start|>user\n" + query + "<|im_end|>"
30
- "<|im_start|>assistant\n"
31
- )
32
- inputs = tokenizer(prompt, return_tensors="pt")
33
- outputs = model.generate(**inputs, max_new_tokens=200)
34
- # Only decode newly generated tokens (skip the “prompt” tokens)
35
  response = tokenizer.decode(
36
- outputs[0][inputs.input_ids.shape[-1]:],
37
- skip_special_tokens=True
38
  )
39
  return {"answer": response.strip()}
 
1
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
2
  from fastapi import FastAPI
3
 
 
 
4
 
5
+ MODEL_ID = "MBZUAI/LaMini-Flan-T5-77M"
6
+
7
+
8
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
9
+ model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_ID)
10
 
11
  app = FastAPI()
12
 
 
 
 
 
 
 
 
13
  @app.get("/chat")
14
  def chat(query: str):
15
  """
16
  GET /chat?query=Your+question
17
  Returns JSON: {"answer": "...model’s reply..."}
18
  """
19
+ inputs = tokenizer(query, return_tensors="pt")
20
+ outputs = model.generate(**inputs, max_new_tokens=100)
 
 
 
 
 
 
 
21
  response = tokenizer.decode(
22
+ outputs[0], skip_special_tokens=True
 
23
  )
24
  return {"answer": response.strip()}