ABIcode23 commited on
Commit
701388d
1 Parent(s): b322b4f

Upload 4 files

Browse files
Files changed (4) hide show
  1. Dockerfile +33 -0
  2. api.py +1 -0
  3. app.py +73 -0
  4. requirements.txt +9 -0
Dockerfile ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+
3
+ # Install system dependencies
4
+ RUN apt-get update && apt-get install -y --no-install-recommends \
5
+ build-essential \
6
+ git \
7
+ && rm -rf /var/lib/apt/lists/*
8
+
9
+ WORKDIR /code
10
+
11
+ # Copy requirements file
12
+ COPY ./requirements.txt /code/requirements.txt
13
+
14
+ # Upgrade pip and install requirements
15
+ RUN pip install --no-cache-dir --upgrade pip && \
16
+ pip install --no-cache-dir -r /code/requirements.txt
17
+
18
+ # Create and use non-root user
19
+ RUN useradd -m user
20
+ USER user
21
+
22
+ # Set environment variables
23
+ ENV HOME=/home/user \
24
+ PATH=/home/user/.local/bin:$PATH \
25
+ PYTHONUNBUFFERED=1
26
+
27
+ WORKDIR $HOME/app
28
+
29
+ # Copy application code
30
+ COPY --chown=user . $HOME/app
31
+
32
+ # Run the application
33
+ CMD ["python", "-m", "uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
api.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from app import app
app.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import logging
3
+ from fastapi import FastAPI, HTTPException
4
+ from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
5
+ from peft import PeftModel, PeftConfig
6
+
7
+ # Set up logging
8
+ logging.basicConfig(level=logging.INFO)
9
+ logger = logging.getLogger(__name__)
10
+
11
+ # Initialize FastAPI app
12
+ app = FastAPI()
13
+
14
+ # Global variables for model, tokenizer, and pipeline
15
+ model = None
16
+ tokenizer = None
17
+ pipe = None
18
+
19
+ @app.on_event("startup")
20
+ async def load_model():
21
+ global model, tokenizer, pipe
22
+
23
+ try:
24
+ # Get Hugging Face token from environment variable
25
+ hf_token = os.environ.get("HUGGINGFACE_TOKEN")
26
+
27
+ logger.info("Loading PEFT configuration...")
28
+ config = PeftConfig.from_pretrained("frankmorales2020/Mistral-7B-text-to-sql-flash-attention-2-dataeval")
29
+
30
+ logger.info("Loading base model...")
31
+ base_model = AutoModelForCausalLM.from_pretrained(
32
+ "mistralai/Mistral-7B-Instruct-v0.3",
33
+ token=hf_token if hf_token else None,
34
+ use_auth_token=True if not hf_token else None
35
+ )
36
+
37
+ logger.info("Loading PEFT model...")
38
+ model = PeftModel.from_pretrained(base_model, "frankmorales2020/Mistral-7B-text-to-sql-flash-attention-2-dataeval")
39
+
40
+ logger.info("Loading tokenizer...")
41
+ tokenizer = AutoTokenizer.from_pretrained(
42
+ "mistralai/Mistral-7B-Instruct-v0.3",
43
+ token=hf_token if hf_token else None,
44
+ use_auth_token=True if not hf_token else None
45
+ )
46
+
47
+ logger.info("Creating pipeline...")
48
+ pipe = pipeline("text2text-generation", model=model, tokenizer=tokenizer)
49
+
50
+ logger.info("Model, tokenizer, and pipeline loaded successfully.")
51
+ except Exception as e:
52
+ logger.error(f"Error loading model or creating pipeline: {e}")
53
+ raise
54
+
55
+ @app.get("/")
56
+ def home():
57
+ return {"message": "Hello World"}
58
+
59
+ @app.get("/generate")
60
+ async def generate(text: str):
61
+ if not pipe:
62
+ raise HTTPException(status_code=503, detail="Model not loaded")
63
+
64
+ try:
65
+ output = pipe(text, max_length=100, num_return_sequences=1)
66
+ return {"output": output[0]['generated_text']}
67
+ except Exception as e:
68
+ logger.error(f"Error during text generation: {e}")
69
+ raise HTTPException(status_code=500, detail=f"Error during text generation: {str(e)}")
70
+
71
+ if __name__ == "__main__":
72
+ import uvicorn
73
+ uvicorn.run(app, host="0.0.0.0", port=7860)
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ fastapi==0.103.0
2
+ uvicorn[standard]==0.17.*
3
+ torch>=1.13.0
4
+ transformers>=4.34.0,<4.35.0
5
+ numpy<2
6
+ peft>=0.6.0,<0.7.0
7
+ accelerate>=0.24.1,<0.25.0
8
+ huggingface_hub>=0.16.4,<0.18.0
9
+ tokenizers>=0.14.0,<0.15.0