arijitskiplegal commited on
Commit
a88f919
·
1 Parent(s): 26f1054

added files

Browse files
Files changed (3) hide show
  1. app.py +45 -0
  2. dockerfile.dockerfile +22 -0
  3. requirements.txt +7 -0
app.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ from pydantic import BaseModel
3
+ from llama_cpp import Llama
4
+ import os
5
+ import subprocess
6
+
7
+ MODEL_PATH = "./model/mistral-7b-instruct-v0.1.Q4_K_M.gguf"
8
+ MODEL_URL = "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf"
9
+
10
+ # Download model if not already present
11
+ os.makedirs("model", exist_ok=True)
12
+ if not os.path.exists(MODEL_PATH):
13
+ print("Downloading model...")
14
+ subprocess.run(["wget", MODEL_URL, "-O", MODEL_PATH], check=True)
15
+
16
+ # Load the model
17
+ llm = Llama(
18
+ model_path=MODEL_PATH,
19
+ n_ctx=2048,
20
+ n_threads=2,
21
+ n_batch=64,
22
+ use_mlock=True
23
+ )
24
+
25
+ # FastAPI app
26
+ app = FastAPI(title="Mistral GGUF LLM API", version="1.0.0")
27
+
28
+ class InferenceRequest(BaseModel):
29
+ prompt: str
30
+ max_tokens: int = 256
31
+
32
+ class InferenceResponse(BaseModel):
33
+ output: str
34
+
35
+ @app.post("/infer", response_model=InferenceResponse)
36
+ def infer(req: InferenceRequest):
37
+ try:
38
+ result = llm(req.prompt, max_tokens=req.max_tokens, stop=["</s>"])
39
+ return InferenceResponse(output=result["choices"][0]["text"].strip())
40
+ except Exception as e:
41
+ return InferenceResponse(output=f"Error generating response: {str(e)}")
42
+
43
+ @app.get("/")
44
+ def health():
45
+ return {"status": "LLM is running. Visit /docs for Swagger UI"}
dockerfile.dockerfile ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+
3
+ # Set working directory
4
+ WORKDIR /code
5
+
6
+ # Install basic OS tools
7
+ RUN apt-get update && apt-get install -y wget && rm -rf /var/lib/apt/lists/*
8
+
9
+ # Copy files
10
+ COPY . .
11
+
12
+ # Install Python deps
13
+ RUN pip install --no-cache-dir -r requirements.txt
14
+
15
+ # Make start.sh executable
16
+ RUN chmod +x /code/start.sh
17
+
18
+ # Expose FastAPI port
19
+ EXPOSE 7860
20
+
21
+ # Start app
22
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ python-multipart
2
+ fastapi
3
+ pydantic
4
+ uvicorn
5
+ requests
6
+ python-dotenv
7
+ ctransformers