Spaces:
Sleeping
Sleeping
change to ollama
Browse files- Dockerfile +16 -6
- Dockerfile.copy +33 -0
- app.py +2 -1
- routers/get_chatrespone.py +83 -0
- start.sh +34 -0
Dockerfile
CHANGED
@@ -4,8 +4,17 @@
|
|
4 |
# Use the official Python 3.10.9 image
|
5 |
FROM python:3.12.1
|
6 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
# RUN apt-get update -qq && apt-get install ffmpeg -y
|
8 |
-
WORKDIR /app
|
9 |
|
10 |
# Copy the current directory contents into the container at .
|
11 |
COPY . .
|
@@ -14,10 +23,7 @@ COPY . .
|
|
14 |
# Install requirements.txt
|
15 |
RUN pip install -r requirements.txt
|
16 |
|
17 |
-
|
18 |
-
USER user
|
19 |
-
ENV HOME=/home/user \
|
20 |
-
PATH=/home/user/.local/bin:$PATH
|
21 |
|
22 |
WORKDIR $HOME/app
|
23 |
|
@@ -29,5 +35,9 @@ COPY --chown=user . $HOME/app
|
|
29 |
|
30 |
EXPOSE 7860
|
31 |
|
|
|
|
|
|
|
32 |
# Start the FastAPI app on port 7860, the default port expected by Spaces
|
33 |
-
CMD ["
|
|
|
|
4 |
# Use the official Python 3.10.9 image
|
5 |
FROM python:3.12.1
|
6 |
|
7 |
+
# Install curl and Ollama
|
8 |
+
RUN apt-get update && apt-get install -y curl && \
|
9 |
+
curl -fsSL https://ollama.ai/install.sh | sh && \
|
10 |
+
apt-get clean && rm -rf /var/lib/apt/lists/*
|
11 |
+
|
12 |
+
RUN useradd -m -u 1000 user
|
13 |
+
USER user
|
14 |
+
ENV HOME=/home/user \
|
15 |
+
PATH=/home/user/.local/bin:$PATH
|
16 |
+
|
17 |
# RUN apt-get update -qq && apt-get install ffmpeg -y
|
|
|
18 |
|
19 |
# Copy the current directory contents into the container at .
|
20 |
COPY . .
|
|
|
23 |
# Install requirements.txt
|
24 |
RUN pip install -r requirements.txt
|
25 |
|
26 |
+
|
|
|
|
|
|
|
27 |
|
28 |
WORKDIR $HOME/app
|
29 |
|
|
|
35 |
|
36 |
EXPOSE 7860
|
37 |
|
38 |
+
# Make the start script executable
|
39 |
+
RUN chmod +x start.sh
|
40 |
+
|
41 |
# Start the FastAPI app on port 7860, the default port expected by Spaces
|
42 |
+
CMD ["./start.sh"]
|
43 |
+
# CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860", "--reload"]
|
Dockerfile.copy
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
|
3 |
+
|
4 |
+
# Use the official Python 3.10.9 image
|
5 |
+
FROM python:3.12.1
|
6 |
+
|
7 |
+
# RUN apt-get update -qq && apt-get install ffmpeg -y
|
8 |
+
WORKDIR /app
|
9 |
+
|
10 |
+
# Copy the current directory contents into the container at .
|
11 |
+
COPY . .
|
12 |
+
|
13 |
+
|
14 |
+
# Install requirements.txt
|
15 |
+
RUN pip install -r requirements.txt
|
16 |
+
|
17 |
+
RUN useradd -m -u 1000 user
|
18 |
+
USER user
|
19 |
+
ENV HOME=/home/user \
|
20 |
+
PATH=/home/user/.local/bin:$PATH
|
21 |
+
|
22 |
+
WORKDIR $HOME/app
|
23 |
+
|
24 |
+
# RUN mkdir -p $HOME/app/cached/hub/models--Systran--faster-whisper-small
|
25 |
+
|
26 |
+
# COPY --from=model /tmp/model $HOME/app/cached/hub/models--Systran--faster-whisper-small
|
27 |
+
|
28 |
+
COPY --chown=user . $HOME/app
|
29 |
+
|
30 |
+
EXPOSE 7860
|
31 |
+
|
32 |
+
# Start the FastAPI app on port 7860, the default port expected by Spaces
|
33 |
+
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860", "--reload"]
|
app.py
CHANGED
@@ -3,7 +3,7 @@ import os
|
|
3 |
from fastapi import FastAPI
|
4 |
from fastapi.middleware.cors import CORSMiddleware
|
5 |
|
6 |
-
from routers import get_transcript
|
7 |
|
8 |
os.environ['HF_HOME'] = "./cached/"
|
9 |
|
@@ -12,6 +12,7 @@ app = FastAPI()
|
|
12 |
app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["Content-Type", "Authorization", "x-api-key"])
|
13 |
|
14 |
app.include_router(get_transcript.router)
|
|
|
15 |
|
16 |
@app.get("/")
|
17 |
def read_root():
|
|
|
3 |
from fastapi import FastAPI
|
4 |
from fastapi.middleware.cors import CORSMiddleware
|
5 |
|
6 |
+
from routers import get_transcript, get_chatrespone
|
7 |
|
8 |
os.environ['HF_HOME'] = "./cached/"
|
9 |
|
|
|
12 |
app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["Content-Type", "Authorization", "x-api-key"])
|
13 |
|
14 |
app.include_router(get_transcript.router)
|
15 |
+
app.include_router(get_chatrespone.router)
|
16 |
|
17 |
@app.get("/")
|
18 |
def read_root():
|
routers/get_chatrespone.py
ADDED
@@ -0,0 +1,83 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from typing import Iterator
|
3 |
+
from dotenv import load_dotenv
|
4 |
+
from fastapi import APIRouter, Depends, Request
|
5 |
+
|
6 |
+
from langchain_huggingface import HuggingFaceEndpoint
|
7 |
+
from langchain_core.prompts import PromptTemplate
|
8 |
+
|
9 |
+
from libs.header_api_auth import get_api_key
|
10 |
+
from pydantic import BaseModel
|
11 |
+
from fastapi.responses import StreamingResponse
|
12 |
+
from langchain_ollama import ChatOllama, OllamaLLM
|
13 |
+
|
14 |
+
load_dotenv()
|
15 |
+
HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN", )
|
16 |
+
|
17 |
+
os.environ["HUGGINGFACEHUB_API_TOKEN"] = HUGGINGFACEHUB_API_TOKEN
|
18 |
+
|
19 |
+
router = APIRouter(prefix="/get-chat-response", tags=["chat"])
|
20 |
+
|
21 |
+
class ChatInputForm(BaseModel):
|
22 |
+
textInput: str
|
23 |
+
repo_id: str
|
24 |
+
prompt: str
|
25 |
+
|
26 |
+
@router.post("/")
|
27 |
+
async def get_chat_respone(body: ChatInputForm):
|
28 |
+
|
29 |
+
prompt = get_prompt(body.prompt)
|
30 |
+
|
31 |
+
promptTemplate = PromptTemplate.from_template(prompt)
|
32 |
+
try:
|
33 |
+
llm = OllamaLLM(
|
34 |
+
model="llama3.2",
|
35 |
+
temperature=0.2,
|
36 |
+
# huggingfacehub_api_token=HUGGINGFACEHUB_API_TOKEN,
|
37 |
+
)
|
38 |
+
|
39 |
+
messages = [
|
40 |
+
("system", prompt),
|
41 |
+
("human", body.textInput)
|
42 |
+
]
|
43 |
+
|
44 |
+
|
45 |
+
llm_chain = promptTemplate | llm
|
46 |
+
response = llm.stream(messages)
|
47 |
+
|
48 |
+
return StreamingResponse(get_response(response), media_type='text/event-stream')
|
49 |
+
except Exception:
|
50 |
+
return {"success": False, "status": Exception}
|
51 |
+
|
52 |
+
|
53 |
+
# get_chat_respone()
|
54 |
+
|
55 |
+
def get_response(response: Iterator[str]):
|
56 |
+
for chunk in response:
|
57 |
+
yield chunk
|
58 |
+
|
59 |
+
|
60 |
+
checkWritting = """You'll be provided with a text: {prompt}
|
61 |
+
---------------
|
62 |
+
IMPORTANT:
|
63 |
+
- If the text is empty, do nothing.
|
64 |
+
- If the given text maintains grammatical accuracy, no suggestions are needed.
|
65 |
+
- If the given text is empty, do nothing.
|
66 |
+
- If the given text contains any errors in grammatical accuracy, provide the corrected text.
|
67 |
+
|
68 |
+
"""
|
69 |
+
|
70 |
+
template = """You are a helpful English teacher. Chat and do user requirement.
|
71 |
+
Answer: Let's think step by step."""
|
72 |
+
baiGiang = """Provide the given phrase in English. Provide the correct and popularly used English phrase along with its American IPA pronunciation and a brief explanation for it. Use the correct English phrase to create 4 example sentences along with the example IPA and brief meanings. Finally, suggest 4 similar English phrases with the correct English version, along with American IPA and their brief meanings.
|
73 |
+
Provie your response in markdown format"""
|
74 |
+
|
75 |
+
def get_prompt(prompt: str):
|
76 |
+
prompts = {
|
77 |
+
'template' : template,
|
78 |
+
'checkWritting': checkWritting,
|
79 |
+
'baiGiang': baiGiang
|
80 |
+
}
|
81 |
+
|
82 |
+
return prompts.get('template', template)
|
83 |
+
|
start.sh
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/bin/bash
|
2 |
+
|
3 |
+
# Set environment variables for optimization
|
4 |
+
export OMP_NUM_THREADS=4
|
5 |
+
export MKL_NUM_THREADS=4
|
6 |
+
export CUDA_VISIBLE_DEVICES=0
|
7 |
+
|
8 |
+
# Start Ollama in the background
|
9 |
+
ollama serve &
|
10 |
+
|
11 |
+
# Pull the model if not already present
|
12 |
+
if ! ollama list | grep -q "llama3.2:latest"; then
|
13 |
+
ollama pull llama3.2:latest
|
14 |
+
fi
|
15 |
+
|
16 |
+
# Wait for Ollama to start up
|
17 |
+
max_attempts=30
|
18 |
+
attempt=0
|
19 |
+
while ! curl -s http://localhost:11434/api/tags >/dev/null; do
|
20 |
+
sleep 1
|
21 |
+
attempt=$((attempt + 1))
|
22 |
+
if [ $attempt -eq $max_attempts ]; then
|
23 |
+
echo "Ollama failed to start within 30 seconds. Exiting."
|
24 |
+
exit 1
|
25 |
+
fi
|
26 |
+
done
|
27 |
+
|
28 |
+
echo "Ollama is ready."
|
29 |
+
|
30 |
+
# Print the API URL
|
31 |
+
echo "API is running on: http://0.0.0.0:7860"
|
32 |
+
|
33 |
+
# Start the FastAPI server
|
34 |
+
uvicorn app:app --host 0.0.0.0 --port 7860 --workers 4 --limit-concurrency 20
|