affandes commited on
Commit
200755e
·
verified ·
1 Parent(s): f5e7381

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -30
app.py CHANGED
@@ -1,33 +1,15 @@
1
- from transformers import AutoTokenizer
2
- from vllm import LLM, SamplingParams
 
3
 
4
- # Initialize the tokenizer
5
- tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-7B-Instruct")
6
 
7
- # Pass the default decoding hyperparameters of Qwen2.5-7B-Instruct
8
- # max_tokens is for the maximum length for generation.
9
- sampling_params = SamplingParams(temperature=0.7, top_p=0.8, repetition_penalty=1.05, max_tokens=512)
10
 
11
- # Input the model name or path. Can be GPTQ or AWQ models.
12
- llm = LLM(model="Qwen/Qwen2.5-7B-Instruct")
13
-
14
- # Prepare your prompts
15
- prompt = "Tell me something about large language models."
16
- messages = [
17
- {"role": "system", "content": "You are Qwen, created by Alibaba Cloud. You are a helpful assistant."},
18
- {"role": "user", "content": prompt}
19
- ]
20
- text = tokenizer.apply_chat_template(
21
- messages,
22
- tokenize=False,
23
- add_generation_prompt=True
24
- )
25
-
26
- # generate outputs
27
- outputs = llm.generate([text], sampling_params)
28
-
29
- # Print the outputs.
30
- for output in outputs:
31
- prompt = output.prompt
32
- generated_text = output.outputs[0].text
33
- print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")
 
1
+ from fastapi import FastAPI
2
+ from pydantic import BaseModel
3
+ from chat-qwen import get_response
4
 
5
+ app = FastAPI()
 
6
 
7
+ # Request model
8
+ class Prompt(BaseModel):
9
+ text: str
10
 
11
+ # Route to sum two numbers
12
+ @app.post("/chat")
13
+ async def calculate_sum(prompt: Prompt):
14
+ result = get_response(prompt.text)
15
+ return {"ask": prompt.text, "answer": result}