Gokulavelan commited on
Commit
762b3c6
·
1 Parent(s): 5aaa5d5
Files changed (2) hide show
  1. Dockerfile +11 -15
  2. app/main.py +13 -8
Dockerfile CHANGED
@@ -1,28 +1,24 @@
1
- # Use the official Python image
2
  FROM python:3.9-slim
3
 
4
  # Set environment variables
5
  ENV PYTHONUNBUFFERED=1 \
6
- PYTHONDONTWRITEBYTECODE=1
 
 
7
 
8
- # Set Hugging Face cache directory to a writable location inside the container
9
- ENV HF_HOME="/app/huggingface_cache"
10
-
11
- # Ensure the cache directory exists and is writable
12
  RUN mkdir -p $HF_HOME && chmod -R 777 $HF_HOME
13
 
14
- # Set working directory inside the container
15
  WORKDIR /app
 
16
 
17
- # Copy and install dependencies
18
- COPY requirements.txt .
19
  RUN pip install --no-cache-dir -r requirements.txt
20
 
21
- # Copy the entire project (including main.py in root)
22
- COPY . /app
23
-
24
- # Expose the FastAPI port
25
  EXPOSE 8000
26
 
27
- # Run FastAPI (use `main` instead of `app.main`)
28
- CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]
 
1
+ # Use official Python image
2
  FROM python:3.9-slim
3
 
4
  # Set environment variables
5
  ENV PYTHONUNBUFFERED=1 \
6
+ PYTHONDONTWRITEBYTECODE=1 \
7
+ HF_HOME="/app/huggingface_cache" \
8
+ TRANSFORMERS_CACHE="/app/huggingface_cache"
9
 
10
+ # Create cache directory
 
 
 
11
  RUN mkdir -p $HF_HOME && chmod -R 777 $HF_HOME
12
 
13
+ # Copy app files
14
  WORKDIR /app
15
+ COPY . .
16
 
17
+ # Install dependencies
 
18
  RUN pip install --no-cache-dir -r requirements.txt
19
 
20
+ # Expose FastAPI port
 
 
 
21
  EXPOSE 8000
22
 
23
+ # Run FastAPI
24
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
app/main.py CHANGED
@@ -1,7 +1,12 @@
 
 
1
  from fastapi import FastAPI, HTTPException
2
  from pydantic import BaseModel
3
  from transformers import AutoModelForCausalLM, AutoTokenizer
4
- import torch
 
 
 
5
 
6
  app = FastAPI()
7
 
@@ -10,25 +15,25 @@ class TextGenerationRequest(BaseModel):
10
  max_length: int = 100
11
  temperature: float = 0.7
12
 
13
- # Load model and tokenizer (force CPU usage)
14
  model_name = "unsloth/Qwen2.5-7B-bnb-4bit"
15
  tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
 
16
  model = AutoModelForCausalLM.from_pretrained(
17
  model_name,
18
  trust_remote_code=True,
19
- torch_dtype=torch.float32, # Change to float32 for CPU
20
- device_map="cpu" # Force CPU usage
21
  )
22
 
23
-
24
- @app.get("/", tags=["Home"])
25
  def api_home():
26
- return {'detail': 'Welcome to FastAPI TextGen Tutorial!'}
27
 
28
  @app.post("/generate")
29
  async def generate_text(request: TextGenerationRequest):
30
  try:
31
- inputs = tokenizer(request.prompt, return_tensors="pt").to("cpu") # Move to CPU
32
  outputs = model.generate(
33
  inputs.input_ids,
34
  max_length=request.max_length,
 
1
+ import os
2
+ import torch
3
  from fastapi import FastAPI, HTTPException
4
  from pydantic import BaseModel
5
  from transformers import AutoModelForCausalLM, AutoTokenizer
6
+
7
+ # Set Hugging Face cache directory (to avoid permission issues in Docker)
8
+ os.environ["HF_HOME"] = "/app/huggingface_cache"
9
+ os.environ["TRANSFORMERS_CACHE"] = "/app/huggingface_cache"
10
 
11
  app = FastAPI()
12
 
 
15
  max_length: int = 100
16
  temperature: float = 0.7
17
 
18
+ # Load model and tokenizer (Force CPU)
19
  model_name = "unsloth/Qwen2.5-7B-bnb-4bit"
20
  tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
21
+
22
  model = AutoModelForCausalLM.from_pretrained(
23
  model_name,
24
  trust_remote_code=True,
25
+ torch_dtype=torch.float32, # Use float32 for CPU
26
+ device_map="cpu" # Force CPU
27
  )
28
 
29
+ @app.get("/")
 
30
  def api_home():
31
+ return {"detail": "Welcome to FastAPI TextGen API!"}
32
 
33
  @app.post("/generate")
34
  async def generate_text(request: TextGenerationRequest):
35
  try:
36
+ inputs = tokenizer(request.prompt, return_tensors="pt").to("cpu") # Ensure CPU usage
37
  outputs = model.generate(
38
  inputs.input_ids,
39
  max_length=request.max_length,