pathakDev10 commited on
Commit
0d57a92
·
1 Parent(s): af3021b

fix upload

Browse files
Files changed (2) hide show
  1. Dockerfile +1 -0
  2. app.py +9 -8
Dockerfile CHANGED
@@ -10,5 +10,6 @@ WORKDIR /app
10
  COPY --chown=user ./requirements.txt requirements.txt
11
  RUN pip install --no-cache-dir --upgrade -r requirements.txt
12
 
 
13
  COPY --chown=user . /app
14
  CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
 
10
  COPY --chown=user ./requirements.txt requirements.txt
11
  RUN pip install --no-cache-dir --upgrade -r requirements.txt
12
 
13
+ RUN wget https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct-GGUF/resolve/main/qwen2.5-1.5b-instruct-q4_k_m.gguf -O qwen2.5-1.5b-instruct-q4_k_m.gguf
14
  COPY --chown=user . /app
15
  CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
app.py CHANGED
@@ -60,6 +60,7 @@ class ChatQwen:
60
  n_ctx=2048,
61
  n_threads=4, # Adjust as needed
62
  batch_size=512,
 
63
  )
64
 
65
  def build_prompt(self, messages: list) -> str:
@@ -170,14 +171,14 @@ class WebSocketStreamingCallbackHandler(BaseCallbackHandler):
170
  # ------------------------ Instantiate the LLM ------------------------
171
  # Choose one mode: local (set use_server=False) or server (set use_server=True).
172
  model_path="qwen2.5-1.5b-instruct-q4_k_m.gguf"
173
- # llm = ChatQwen(
174
- # temperature=0.3,
175
- # streaming=True,
176
- # max_new_tokens=512,
177
- # use_server=False,
178
- # model_path=model_path,
179
- # # server_url="http://localhost:8000" # Uncomment and set if using server mode.
180
- # )
181
 
182
  # ------------------------ FAISS and Sentence Transformer Setup ------------------------
183
 
 
60
  n_ctx=2048,
61
  n_threads=4, # Adjust as needed
62
  batch_size=512,
63
+ verbose=False,
64
  )
65
 
66
  def build_prompt(self, messages: list) -> str:
 
171
  # ------------------------ Instantiate the LLM ------------------------
172
  # Choose one mode: local (set use_server=False) or server (set use_server=True).
173
  model_path="qwen2.5-1.5b-instruct-q4_k_m.gguf"
174
+ llm = ChatQwen(
175
+ temperature=0.3,
176
+ streaming=True,
177
+ max_new_tokens=512,
178
+ use_server=False,
179
+ model_path=model_path,
180
+ # server_url="http://localhost:8000" # Uncomment and set if using server mode.
181
+ )
182
 
183
  # ------------------------ FAISS and Sentence Transformer Setup ------------------------
184