alishafique commited on
Commit
f88a5b9
·
verified ·
1 Parent(s): a2356f0

Upload 2 files

Browse files
Files changed (2) hide show
  1. Dockerfile (2) +56 -0
  2. app (3).py +108 -0
Dockerfile (2) ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ARG CUDA_IMAGE="12.1.1-devel-ubuntu22.04"
2
+ FROM nvidia/cuda:${CUDA_IMAGE}
3
+
4
+ # We need to set the host to 0.0.0.0 to allow outside access
5
+ ENV HOST 0.0.0.0
6
+
7
+ RUN apt-get update && apt-get upgrade -y \
8
+ && apt-get install -y git build-essential \
9
+ python3 python3-pip gcc wget \
10
+ ocl-icd-opencl-dev opencl-headers clinfo \
11
+ libclblast-dev libopenblas-dev \
12
+ && mkdir -p /etc/OpenCL/vendors && echo "libnvidia-opencl.so.1" > /etc/OpenCL/vendors/nvidia.icd
13
+
14
+ COPY . .
15
+
16
+ # setting build related env
17
+ # ENV CUDA_DOCKER_ARCH=all
18
+ # ENV LLAMA_CUBLAS=1
19
+
20
+ RUN nvcc --version && python3 --version
21
+
22
+ # Install depencencies
23
+ RUN python3 -m pip install --upgrade pip pytest cmake \
24
+ scikit-build setuptools fastapi uvicorn sse-starlette \
25
+ pydantic-settings starlette-context gradio huggingface_hub hf_transfer
26
+
27
+ # Install llama-cpp-python (build with cuda)
28
+ # RUN CMAKE_ARGS="-DGGML_CUDA=on -DCMAKE_CUDA_ARCHITECTURES=75" FORCE_CMAKE=1 python3 -m pip install llama-cpp-python --force-reinstall --upgrade --no-cache-dir --verbose
29
+ # RUN python3 -m pip install llama-cpp-python
30
+ #RUN python3 -m pip install llama-cpp-python \
31
+ # --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cu121
32
+
33
+ RUN pip install https://github.com/abetlen/llama-cpp-python/releases/download/v0.3.4-cu121/llama_cpp_python-0.3.4-cp310-cp310-linux_x86_64.whl
34
+
35
+
36
+
37
+ RUN useradd -m -u 1000 user
38
+ # Switch to the "user" user
39
+ USER user
40
+ # Set home to the user's home directory
41
+ ENV HOME=/home/user \
42
+ PATH=/home/user/.local/bin:$PATH \
43
+ PYTHONPATH=$HOME/app \
44
+ PYTHONUNBUFFERED=1 \
45
+ GRADIO_ALLOW_FLAGGING=never \
46
+ GRADIO_NUM_PORTS=1 \
47
+ GRADIO_SERVER_NAME=0.0.0.0 \
48
+ GRADIO_THEME=huggingface \
49
+ SYSTEM=spaces
50
+
51
+ WORKDIR $HOME/app
52
+
53
+ # Copy the current directory contents into the container at $HOME/app setting the owner to the user
54
+ COPY --chown=user . $HOME/app
55
+
56
+ CMD ["python3", "app.py"]
app (3).py ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import subprocess
4
+ import gradio as gr
5
+ from threading import Thread
6
+ from huggingface_hub import hf_hub_download
7
+ from llama_cpp import Llama
8
+ from datetime import datetime
9
+
10
+ # Load model from Hugging Face Hub
11
+ MODEL_ID = "large-traversaal/Alif-1.0-8B-Instruct"
12
+ MODEL_FILE = "model-Q8_0.gguf"
13
+
14
+ model_path_file = hf_hub_download(MODEL_ID, filename=MODEL_FILE)
15
+
16
+ # Initialize Llama model
17
+ llama = Llama(
18
+ model_path=model_path_file,
19
+ n_gpu_layers=40, # Adjust based on VRAM
20
+ n_threads=8, # Match CPU cores
21
+ n_batch=512, # Optimize for better VRAM usage
22
+ n_ctx=4096, # Context window size
23
+ verbose=True # Enable debug logging
24
+ )
25
+
26
+
27
+ # Function to generate responses
28
+ def generate_response(message, history, system_prompt, temperature, max_new_tokens, top_k, repetition_penalty, top_p):
29
+ # chat_prompt = f"You are an Urdu Chatbot. Write an appropriate response for the given instruction: {message} Response:"
30
+ chat_prompt = f"{system_prompt}\n ### Instruction: {message}\n ### Response:"
31
+ response = llama(chat_prompt, temperature=temperature, max_tokens=max_new_tokens, top_k=top_k, repeat_penalty=repetition_penalty, top_p=top_p, stop=["Q:", "\n"], echo=False, stream=True)
32
+
33
+ text = ""
34
+ for chunk in response:
35
+ content = chunk["choices"][0]["text"]
36
+ if content:
37
+ text += content
38
+ yield text
39
+
40
+ # def generate_response(message, history, system_prompt, temperature, max_new_tokens, top_k, repetition_penalty, top_p):
41
+ # """Generates a streaming response from the Llama model."""
42
+ # messages = [
43
+ # {"role": "system", "content": "You are an Urdu Chatbot. Write an appropriate response for the given instruction."},
44
+ # ]
45
+
46
+ # # Add history and the current message
47
+ # #for user, bot in history:
48
+ # #messages.append({"role": "user", "content": user})
49
+ # #messages.append({"role": "assistant", "content": bot})
50
+
51
+ # messages.append({"role": "user", "content": message})
52
+
53
+ # response = llama.create_chat_completion(
54
+ # messages=messages,
55
+ # stream=True,
56
+ # )
57
+
58
+ # partial_message = ""
59
+ # for part in response:
60
+ # content = part["choices"][0]["delta"].get("content", "")
61
+ # partial_message += content
62
+ # yield partial_message
63
+
64
+
65
+ # JavaScript function for `on_load`
66
+ on_load = """
67
+ async()=>{ alert("Welcome to the Traversaal Alif 1.0 Chatbot! This is an experimental AI model. Please use responsibly."); }
68
+ """
69
+
70
+ placeholder = """
71
+ <center><h1>10 Questions</h1><br>Think of a person, place, or thing. I'll ask you 10 yes/no questions to try and guess it.
72
+ </center>
73
+ """
74
+
75
+ # Create custom chat UI using `gr.Blocks`
76
+ with gr.Blocks(js=on_load, theme=gr.themes.Default()) as demo:
77
+ with gr.Column(scale=1, elem_id="center-content"):
78
+ gr.Markdown(
79
+ """
80
+ <div style="text-align: center;">
81
+ <h1>Alif 1.0 Urdu & English Chatbot 🚀</h1>
82
+ <p>Alif 1.0 8B Instruct is an open-source model with highly advanced multilingual reasoning capabilities. It utilizes human refined multilingual synthetic data paired with reasoning to enhance cultural nuance and reasoning capabilities in english and urdu languages.</p>
83
+ </div>
84
+ """,
85
+ )
86
+
87
+ chat = gr.ChatInterface(
88
+ generate_response,
89
+ #chatbot=gr.Chatbot(placeholder=placeholder),
90
+ #title="🚀" + " " + "Alif-1.0 Chatbot",
91
+ #description="Urdu AI Chatbot powered by Llama.cpp",
92
+ examples=[
93
+ ["شہر کراچی کے بارے میں بتاؤ"],
94
+ ["قابل تجدید توانائی کیا ہے؟"],
95
+ ["پاکستان کے بارے میں بتائیں"]
96
+ ],
97
+ additional_inputs_accordion=gr.Accordion(label="⚙️ Parameters", open=False, render=False),
98
+ additional_inputs=[
99
+ gr.Textbox(value="You are an Urdu Chatbot. Write an appropriate response for the given instruction in Urdu.", label="System prompt", render=False),
100
+ gr.Slider(0, 1, 0.8, label="Temperature", render=False),
101
+ gr.Slider(128, 4096, 512, label="Max new tokens", render=False),
102
+ gr.Slider(1, 80, 40, step=1, label="Top K sampling", render=False),
103
+ gr.Slider(0, 2, 1.1, label="Repetition penalty", render=False),
104
+ gr.Slider(0, 1, 0.95, label="Top P sampling", render=False),
105
+ ],
106
+ )
107
+
108
+ demo.queue(max_size=10).launch(share=True)