Spaces:

affandes
/

try-deploy-qwen

Sleeping

affandes commited on Dec 1, 2024

Commit

200755e

verified ·

1 Parent(s): f5e7381

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,33 +1,15 @@
-from transformers import AutoTokenizer
-from vllm import LLM, SamplingParams
-# Initialize the tokenizer
-tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-7B-Instruct")
-# Pass the default decoding hyperparameters of Qwen2.5-7B-Instruct
-# max_tokens is for the maximum length for generation.
-sampling_params = SamplingParams(temperature=0.7, top_p=0.8, repetition_penalty=1.05, max_tokens=512)
-# Input the model name or path. Can be GPTQ or AWQ models.
-llm = LLM(model="Qwen/Qwen2.5-7B-Instruct")
-# Prepare your prompts
-prompt = "Tell me something about large language models."
-messages = [
-    {"role": "system", "content": "You are Qwen, created by Alibaba Cloud. You are a helpful assistant."},
-    {"role": "user", "content": prompt}
-]
-text = tokenizer.apply_chat_template(
-    messages,
-    tokenize=False,
-    add_generation_prompt=True
-)
-# generate outputs
-outputs = llm.generate([text], sampling_params)
-# Print the outputs.
-for output in outputs:
-    prompt = output.prompt
-    generated_text = output.outputs[0].text
-    print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")

+from fastapi import FastAPI
+from pydantic import BaseModel
+from chat-qwen import get_response
+app = FastAPI()
+# Request model
+class Prompt(BaseModel):
+    text: str
+# Route to sum two numbers
+@app.post("/chat")
+async def calculate_sum(prompt: Prompt):
+    result = get_response(prompt.text)
+    return {"ask": prompt.text, "answer": result}