Spaces:
Runtime error
Runtime error
Commit
·
762b3c6
1
Parent(s):
5aaa5d5
chnages
Browse files- Dockerfile +11 -15
- app/main.py +13 -8
Dockerfile
CHANGED
@@ -1,28 +1,24 @@
|
|
1 |
-
# Use
|
2 |
FROM python:3.9-slim
|
3 |
|
4 |
# Set environment variables
|
5 |
ENV PYTHONUNBUFFERED=1 \
|
6 |
-
PYTHONDONTWRITEBYTECODE=1
|
|
|
|
|
7 |
|
8 |
-
#
|
9 |
-
ENV HF_HOME="/app/huggingface_cache"
|
10 |
-
|
11 |
-
# Ensure the cache directory exists and is writable
|
12 |
RUN mkdir -p $HF_HOME && chmod -R 777 $HF_HOME
|
13 |
|
14 |
-
#
|
15 |
WORKDIR /app
|
|
|
16 |
|
17 |
-
#
|
18 |
-
COPY requirements.txt .
|
19 |
RUN pip install --no-cache-dir -r requirements.txt
|
20 |
|
21 |
-
#
|
22 |
-
COPY . /app
|
23 |
-
|
24 |
-
# Expose the FastAPI port
|
25 |
EXPOSE 8000
|
26 |
|
27 |
-
# Run FastAPI
|
28 |
-
CMD ["uvicorn", "
|
|
|
1 |
+
# Use official Python image
|
2 |
FROM python:3.9-slim
|
3 |
|
4 |
# Set environment variables
|
5 |
ENV PYTHONUNBUFFERED=1 \
|
6 |
+
PYTHONDONTWRITEBYTECODE=1 \
|
7 |
+
HF_HOME="/app/huggingface_cache" \
|
8 |
+
TRANSFORMERS_CACHE="/app/huggingface_cache"
|
9 |
|
10 |
+
# Create cache directory
|
|
|
|
|
|
|
11 |
RUN mkdir -p $HF_HOME && chmod -R 777 $HF_HOME
|
12 |
|
13 |
+
# Copy app files
|
14 |
WORKDIR /app
|
15 |
+
COPY . .
|
16 |
|
17 |
+
# Install dependencies
|
|
|
18 |
RUN pip install --no-cache-dir -r requirements.txt
|
19 |
|
20 |
+
# Expose FastAPI port
|
|
|
|
|
|
|
21 |
EXPOSE 8000
|
22 |
|
23 |
+
# Run FastAPI
|
24 |
+
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
|
app/main.py
CHANGED
@@ -1,7 +1,12 @@
|
|
|
|
|
|
1 |
from fastapi import FastAPI, HTTPException
|
2 |
from pydantic import BaseModel
|
3 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
4 |
-
|
|
|
|
|
|
|
5 |
|
6 |
app = FastAPI()
|
7 |
|
@@ -10,25 +15,25 @@ class TextGenerationRequest(BaseModel):
|
|
10 |
max_length: int = 100
|
11 |
temperature: float = 0.7
|
12 |
|
13 |
-
# Load model and tokenizer (
|
14 |
model_name = "unsloth/Qwen2.5-7B-bnb-4bit"
|
15 |
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
|
|
|
16 |
model = AutoModelForCausalLM.from_pretrained(
|
17 |
model_name,
|
18 |
trust_remote_code=True,
|
19 |
-
torch_dtype=torch.float32, #
|
20 |
-
device_map="cpu" # Force CPU
|
21 |
)
|
22 |
|
23 |
-
|
24 |
-
@app.get("/", tags=["Home"])
|
25 |
def api_home():
|
26 |
-
return {
|
27 |
|
28 |
@app.post("/generate")
|
29 |
async def generate_text(request: TextGenerationRequest):
|
30 |
try:
|
31 |
-
inputs = tokenizer(request.prompt, return_tensors="pt").to("cpu") #
|
32 |
outputs = model.generate(
|
33 |
inputs.input_ids,
|
34 |
max_length=request.max_length,
|
|
|
1 |
+
import os
|
2 |
+
import torch
|
3 |
from fastapi import FastAPI, HTTPException
|
4 |
from pydantic import BaseModel
|
5 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
6 |
+
|
7 |
+
# Set Hugging Face cache directory (to avoid permission issues in Docker)
|
8 |
+
os.environ["HF_HOME"] = "/app/huggingface_cache"
|
9 |
+
os.environ["TRANSFORMERS_CACHE"] = "/app/huggingface_cache"
|
10 |
|
11 |
app = FastAPI()
|
12 |
|
|
|
15 |
max_length: int = 100
|
16 |
temperature: float = 0.7
|
17 |
|
18 |
+
# Load model and tokenizer (Force CPU)
|
19 |
model_name = "unsloth/Qwen2.5-7B-bnb-4bit"
|
20 |
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
|
21 |
+
|
22 |
model = AutoModelForCausalLM.from_pretrained(
|
23 |
model_name,
|
24 |
trust_remote_code=True,
|
25 |
+
torch_dtype=torch.float32, # Use float32 for CPU
|
26 |
+
device_map="cpu" # Force CPU
|
27 |
)
|
28 |
|
29 |
+
@app.get("/")
|
|
|
30 |
def api_home():
|
31 |
+
return {"detail": "Welcome to FastAPI TextGen API!"}
|
32 |
|
33 |
@app.post("/generate")
|
34 |
async def generate_text(request: TextGenerationRequest):
|
35 |
try:
|
36 |
+
inputs = tokenizer(request.prompt, return_tensors="pt").to("cpu") # Ensure CPU usage
|
37 |
outputs = model.generate(
|
38 |
inputs.input_ids,
|
39 |
max_length=request.max_length,
|