minhpng commited on
Commit
d428744
·
1 Parent(s): 475073b

change to ollama

Browse files
Files changed (5) hide show
  1. Dockerfile +16 -6
  2. Dockerfile.copy +33 -0
  3. app.py +2 -1
  4. routers/get_chatrespone.py +83 -0
  5. start.sh +34 -0
Dockerfile CHANGED
@@ -4,8 +4,17 @@
4
  # Use the official Python 3.10.9 image
5
  FROM python:3.12.1
6
 
 
 
 
 
 
 
 
 
 
 
7
  # RUN apt-get update -qq && apt-get install ffmpeg -y
8
- WORKDIR /app
9
 
10
  # Copy the current directory contents into the container at .
11
  COPY . .
@@ -14,10 +23,7 @@ COPY . .
14
  # Install requirements.txt
15
  RUN pip install -r requirements.txt
16
 
17
- RUN useradd -m -u 1000 user
18
- USER user
19
- ENV HOME=/home/user \
20
- PATH=/home/user/.local/bin:$PATH
21
 
22
  WORKDIR $HOME/app
23
 
@@ -29,5 +35,9 @@ COPY --chown=user . $HOME/app
29
 
30
  EXPOSE 7860
31
 
 
 
 
32
  # Start the FastAPI app on port 7860, the default port expected by Spaces
33
- CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860", "--reload"]
 
 
4
  # Use the official Python 3.10.9 image
5
  FROM python:3.12.1
6
 
7
+ # Install curl and Ollama
8
+ RUN apt-get update && apt-get install -y curl && \
9
+ curl -fsSL https://ollama.ai/install.sh | sh && \
10
+ apt-get clean && rm -rf /var/lib/apt/lists/*
11
+
12
+ RUN useradd -m -u 1000 user
13
+ USER user
14
+ ENV HOME=/home/user \
15
+ PATH=/home/user/.local/bin:$PATH
16
+
17
  # RUN apt-get update -qq && apt-get install ffmpeg -y
 
18
 
19
  # Copy the current directory contents into the container at .
20
  COPY . .
 
23
  # Install requirements.txt
24
  RUN pip install -r requirements.txt
25
 
26
+
 
 
 
27
 
28
  WORKDIR $HOME/app
29
 
 
35
 
36
  EXPOSE 7860
37
 
38
+ # Make the start script executable
39
+ RUN chmod +x start.sh
40
+
41
  # Start the FastAPI app on port 7860, the default port expected by Spaces
42
+ CMD ["./start.sh"]
43
+ # CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860", "--reload"]
Dockerfile.copy ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+
3
+
4
+ # Use the official Python 3.10.9 image
5
+ FROM python:3.12.1
6
+
7
+ # RUN apt-get update -qq && apt-get install ffmpeg -y
8
+ WORKDIR /app
9
+
10
+ # Copy the current directory contents into the container at .
11
+ COPY . .
12
+
13
+
14
+ # Install requirements.txt
15
+ RUN pip install -r requirements.txt
16
+
17
+ RUN useradd -m -u 1000 user
18
+ USER user
19
+ ENV HOME=/home/user \
20
+ PATH=/home/user/.local/bin:$PATH
21
+
22
+ WORKDIR $HOME/app
23
+
24
+ # RUN mkdir -p $HOME/app/cached/hub/models--Systran--faster-whisper-small
25
+
26
+ # COPY --from=model /tmp/model $HOME/app/cached/hub/models--Systran--faster-whisper-small
27
+
28
+ COPY --chown=user . $HOME/app
29
+
30
+ EXPOSE 7860
31
+
32
+ # Start the FastAPI app on port 7860, the default port expected by Spaces
33
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860", "--reload"]
app.py CHANGED
@@ -3,7 +3,7 @@ import os
3
  from fastapi import FastAPI
4
  from fastapi.middleware.cors import CORSMiddleware
5
 
6
- from routers import get_transcript
7
 
8
  os.environ['HF_HOME'] = "./cached/"
9
 
@@ -12,6 +12,7 @@ app = FastAPI()
12
  app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["Content-Type", "Authorization", "x-api-key"])
13
 
14
  app.include_router(get_transcript.router)
 
15
 
16
  @app.get("/")
17
  def read_root():
 
3
  from fastapi import FastAPI
4
  from fastapi.middleware.cors import CORSMiddleware
5
 
6
+ from routers import get_transcript, get_chatrespone
7
 
8
  os.environ['HF_HOME'] = "./cached/"
9
 
 
12
  app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["Content-Type", "Authorization", "x-api-key"])
13
 
14
  app.include_router(get_transcript.router)
15
+ app.include_router(get_chatrespone.router)
16
 
17
  @app.get("/")
18
  def read_root():
routers/get_chatrespone.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from typing import Iterator
3
+ from dotenv import load_dotenv
4
+ from fastapi import APIRouter, Depends, Request
5
+
6
+ from langchain_huggingface import HuggingFaceEndpoint
7
+ from langchain_core.prompts import PromptTemplate
8
+
9
+ from libs.header_api_auth import get_api_key
10
+ from pydantic import BaseModel
11
+ from fastapi.responses import StreamingResponse
12
+ from langchain_ollama import ChatOllama, OllamaLLM
13
+
14
+ load_dotenv()
15
+ HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN", )
16
+
17
+ os.environ["HUGGINGFACEHUB_API_TOKEN"] = HUGGINGFACEHUB_API_TOKEN
18
+
19
+ router = APIRouter(prefix="/get-chat-response", tags=["chat"])
20
+
21
+ class ChatInputForm(BaseModel):
22
+ textInput: str
23
+ repo_id: str
24
+ prompt: str
25
+
26
+ @router.post("/")
27
+ async def get_chat_respone(body: ChatInputForm):
28
+
29
+ prompt = get_prompt(body.prompt)
30
+
31
+ promptTemplate = PromptTemplate.from_template(prompt)
32
+ try:
33
+ llm = OllamaLLM(
34
+ model="llama3.2",
35
+ temperature=0.2,
36
+ # huggingfacehub_api_token=HUGGINGFACEHUB_API_TOKEN,
37
+ )
38
+
39
+ messages = [
40
+ ("system", prompt),
41
+ ("human", body.textInput)
42
+ ]
43
+
44
+
45
+ llm_chain = promptTemplate | llm
46
+ response = llm.stream(messages)
47
+
48
+ return StreamingResponse(get_response(response), media_type='text/event-stream')
49
+ except Exception:
50
+ return {"success": False, "status": Exception}
51
+
52
+
53
+ # get_chat_respone()
54
+
55
+ def get_response(response: Iterator[str]):
56
+ for chunk in response:
57
+ yield chunk
58
+
59
+
60
+ checkWritting = """You'll be provided with a text: {prompt}
61
+ ---------------
62
+ IMPORTANT:
63
+ - If the text is empty, do nothing.
64
+ - If the given text maintains grammatical accuracy, no suggestions are needed.
65
+ - If the given text is empty, do nothing.
66
+ - If the given text contains any errors in grammatical accuracy, provide the corrected text.
67
+
68
+ """
69
+
70
+ template = """You are a helpful English teacher. Chat and do user requirement.
71
+ Answer: Let's think step by step."""
72
+ baiGiang = """Provide the given phrase in English. Provide the correct and popularly used English phrase along with its American IPA pronunciation and a brief explanation for it. Use the correct English phrase to create 4 example sentences along with the example IPA and brief meanings. Finally, suggest 4 similar English phrases with the correct English version, along with American IPA and their brief meanings.
73
+ Provie your response in markdown format"""
74
+
75
+ def get_prompt(prompt: str):
76
+ prompts = {
77
+ 'template' : template,
78
+ 'checkWritting': checkWritting,
79
+ 'baiGiang': baiGiang
80
+ }
81
+
82
+ return prompts.get('template', template)
83
+
start.sh ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ # Set environment variables for optimization
4
+ export OMP_NUM_THREADS=4
5
+ export MKL_NUM_THREADS=4
6
+ export CUDA_VISIBLE_DEVICES=0
7
+
8
+ # Start Ollama in the background
9
+ ollama serve &
10
+
11
+ # Pull the model if not already present
12
+ if ! ollama list | grep -q "llama3.2:latest"; then
13
+ ollama pull llama3.2:latest
14
+ fi
15
+
16
+ # Wait for Ollama to start up
17
+ max_attempts=30
18
+ attempt=0
19
+ while ! curl -s http://localhost:11434/api/tags >/dev/null; do
20
+ sleep 1
21
+ attempt=$((attempt + 1))
22
+ if [ $attempt -eq $max_attempts ]; then
23
+ echo "Ollama failed to start within 30 seconds. Exiting."
24
+ exit 1
25
+ fi
26
+ done
27
+
28
+ echo "Ollama is ready."
29
+
30
+ # Print the API URL
31
+ echo "API is running on: http://0.0.0.0:7860"
32
+
33
+ # Start the FastAPI server
34
+ uvicorn app:app --host 0.0.0.0 --port 7860 --workers 4 --limit-concurrency 20