Spaces:

ariji1
/

Docker-JD

Runtime error

arijitskiplegal commited on 16 days ago

Commit

a88f919

1 Parent(s): 26f1054

added files

Files changed (3) hide show

app.py ADDED Viewed

+from fastapi import FastAPI
+from pydantic import BaseModel
+from llama_cpp import Llama
+import os
+import subprocess
+MODEL_PATH = "./model/mistral-7b-instruct-v0.1.Q4_K_M.gguf"
+MODEL_URL = "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf"
+# Download model if not already present
+os.makedirs("model", exist_ok=True)
+if not os.path.exists(MODEL_PATH):
+    print("Downloading model...")
+    subprocess.run(["wget", MODEL_URL, "-O", MODEL_PATH], check=True)
+# Load the model
+llm = Llama(
+    model_path=MODEL_PATH,
+    n_ctx=2048,
+    n_threads=2,
+    n_batch=64,
+    use_mlock=True
+)
+# FastAPI app
+app = FastAPI(title="Mistral GGUF LLM API", version="1.0.0")
+class InferenceRequest(BaseModel):
+    prompt: str
+    max_tokens: int = 256
+class InferenceResponse(BaseModel):
+    output: str
+@app.post("/infer", response_model=InferenceResponse)
+def infer(req: InferenceRequest):
+    try:
+        result = llm(req.prompt, max_tokens=req.max_tokens, stop=["</s>"])
+        return InferenceResponse(output=result["choices"][0]["text"].strip())
+    except Exception as e:
+        return InferenceResponse(output=f"Error generating response: {str(e)}")
+@app.get("/")
+def health():
+    return {"status": "LLM is running. Visit /docs for Swagger UI"}

dockerfile.dockerfile ADDED Viewed

+FROM python:3.10-slim
+# Set working directory
+WORKDIR /code
+# Install basic OS tools
+RUN apt-get update && apt-get install -y wget && rm -rf /var/lib/apt/lists/*
+# Copy files
+COPY . .
+# Install Python deps
+RUN pip install --no-cache-dir -r requirements.txt
+# Make start.sh executable
+RUN chmod +x /code/start.sh
+# Expose FastAPI port
+EXPOSE 7860
+# Start app
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

requirements.txt ADDED Viewed

+python-multipart
+fastapi
+pydantic
+uvicorn
+requests
+python-dotenv
+ctransformers