Spaces:

pathakDev10
/

EstateGuru

Sleeping

pathakDev10 commited on Mar 25

Commit

0d57a92

1 Parent(s): af3021b

fix upload

Files changed (2) hide show

Dockerfile CHANGED Viewed

@@ -10,5 +10,6 @@ WORKDIR /app
 COPY --chown=user ./requirements.txt requirements.txt
 RUN pip install --no-cache-dir --upgrade -r requirements.txt
 COPY --chown=user . /app
 CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

 COPY --chown=user ./requirements.txt requirements.txt
 RUN pip install --no-cache-dir --upgrade -r requirements.txt
+RUN wget https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct-GGUF/resolve/main/qwen2.5-1.5b-instruct-q4_k_m.gguf -O qwen2.5-1.5b-instruct-q4_k_m.gguf
 COPY --chown=user . /app
 CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

app.py CHANGED Viewed

@@ -60,6 +60,7 @@ class ChatQwen:
                 n_ctx=2048,
                 n_threads=4,  # Adjust as needed
                 batch_size=512,
             )
     def build_prompt(self, messages: list) -> str:
@@ -170,14 +171,14 @@ class WebSocketStreamingCallbackHandler(BaseCallbackHandler):
 # ------------------------ Instantiate the LLM ------------------------
 # Choose one mode: local (set use_server=False) or server (set use_server=True).
 model_path="qwen2.5-1.5b-instruct-q4_k_m.gguf"
-# llm = ChatQwen(
-#     temperature=0.3,
-#     streaming=True,
-#     max_new_tokens=512,
-#     use_server=False,
-#     model_path=model_path,
-#     # server_url="http://localhost:8000"  # Uncomment and set if using server mode.
-# )
 # ------------------------ FAISS and Sentence Transformer Setup ------------------------

                 n_ctx=2048,
                 n_threads=4,  # Adjust as needed
                 batch_size=512,
+                verbose=False,
             )
     def build_prompt(self, messages: list) -> str:
 # ------------------------ Instantiate the LLM ------------------------
 # Choose one mode: local (set use_server=False) or server (set use_server=True).
 model_path="qwen2.5-1.5b-instruct-q4_k_m.gguf"
+llm = ChatQwen(
+    temperature=0.3,
+    streaming=True,
+    max_new_tokens=512,
+    use_server=False,
+    model_path=model_path,
+    # server_url="http://localhost:8000"  # Uncomment and set if using server mode.
+)
 # ------------------------ FAISS and Sentence Transformer Setup ------------------------