QWEN-Chat-QA-0.5B

Running

App Files Files Community

harry85 commited on Jul 4

Commit

e3f4a50

•

1 Parent(s): 2a6e87a

Upload 4 files

Browse files

Files changed (4) hide show

Dockerfile +27 -0
README.md +4 -4
app.py +57 -0
requirements.txt +7 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,27 @@

+# Use the official Python 3.9 image
+FROM python:3.9
+# Set the working directory to /code
+WORKDIR /code
+# Copy the current directory contents into the container at /code
+COPY ./requirements.txt /code/requirements.txt
+# Install requirements.txt
+RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
+# Set up a new user named "user" with user ID 1000
+RUN useradd -m -u 1000 user
+# Switch to the "user" user
+USER user
+# Set home to the user's home directory
+ENV HOME=/home/user \
+	PATH=/home/user/.local/bin:$PATH
+# Set the working directory to the user's home directory
+WORKDIR $HOME/app
+# Copy the current directory contents into the container at $HOME/app setting the owner to the user
+COPY --chown=user . $HOME/app
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

README.md CHANGED Viewed

@@ -1,8 +1,8 @@
 ---
-title: QWEN Chat QA 0.5B
-emoji: 💻
-colorFrom: red
-colorTo: green
 sdk: docker
 pinned: false
 license: mit

 ---
+title: Text Generation
+emoji: 🌍
+colorFrom: green
+colorTo: yellow
 sdk: docker
 pinned: false
 license: mit

app.py ADDED Viewed

	@@ -0,0 +1,57 @@

+# Install the necessary packages
+# pip install accelerate transformers fastapi pydantic torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+import torch
+from pydantic import BaseModel
+from fastapi import FastAPI
+# Initialize the FastAPI app
+app = FastAPI(docs_url="/")
+# Load the model and tokenizer once at startup
+device = "cuda" # the device to load the model onto
+model = AutoModelForCausalLM.from_pretrained(
+    "Qwen/Qwen1.5-0.5B-Chat",
+    torch_dtype="auto",
+    device_map="auto"
+)
+tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen1.5-0.5B-Chat")
+# Define the request model
+class RequestModel(BaseModel):
+    input: str
+# Define a greeting endpoint
+@app.get("/")
+def greet_json():
+    return {"message": "working..."}
+# Define the text generation endpoint
+@app.post("/prompt")
+def get_response(request: RequestModel):
+    prompt = request.input
+    messages = [
+        {"role": "system", "content": "You are a helpful assistant."},
+        {"role": "user", "content": prompt}
+    ]
+    text = tokenizer.apply_chat_template(
+        messages,
+        tokenize=False,
+        add_generation_prompt=True
+    )
+    model_inputs = tokenizer([text], return_tensors="pt").to(device)
+    generated_ids = model.generate(
+        model_inputs.input_ids,
+        max_new_tokens=512
+    )
+    generated_ids = [
+        output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
+    ]
+    response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
+    return {"generated_text": response}
+# To run the FastAPI app, use the command: uvicorn <filename>:app --reload

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+fastapi==0.74.*
+requests==2.27.*
+uvicorn[standard]==0.17.*
+sentencepiece==0.1.*
+torch==1.11.*
+transformers==4.*
+accelerate