Spaces:
Sleeping
Sleeping
Commit
·
dd1baa0
1
Parent(s):
0d57a92
changes
Browse files- .gitignore +2 -1
- Dockerfile +10 -1
- __pycache__/app.cpython-312.pyc +0 -0
- app.py +32 -18
- download.py +0 -1
.gitignore
CHANGED
@@ -1,2 +1,3 @@
|
|
1 |
qwen2.5-1.5b-instruct-q4_k_m.gguf
|
2 |
-
qwen2.5-1.5b-instruct-q5_k_m.gguf
|
|
|
|
1 |
qwen2.5-1.5b-instruct-q4_k_m.gguf
|
2 |
+
qwen2.5-1.5b-instruct-q5_k_m.gguf
|
3 |
+
*.gguf
|
Dockerfile
CHANGED
@@ -10,6 +10,15 @@ WORKDIR /app
|
|
10 |
COPY --chown=user ./requirements.txt requirements.txt
|
11 |
RUN pip install --no-cache-dir --upgrade -r requirements.txt
|
12 |
|
13 |
-
RUN
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
COPY --chown=user . /app
|
15 |
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
|
|
|
10 |
COPY --chown=user ./requirements.txt requirements.txt
|
11 |
RUN pip install --no-cache-dir --upgrade -r requirements.txt
|
12 |
|
13 |
+
RUN python -c "\
|
14 |
+
from huggingface_hub import hf_hub_download; \
|
15 |
+
hf_hub_download(\
|
16 |
+
repo_id='Qwen/Qwen2.5-1.5B-Instruct-GGUF', \
|
17 |
+
filename='qwen2.5-1.5b-instruct-q4_k_m.gguf', \
|
18 |
+
local_dir='.', \
|
19 |
+
local_dir_use_symlinks=False, \
|
20 |
+
token='$HF_TOKEN'\
|
21 |
+
)"
|
22 |
+
|
23 |
COPY --chown=user . /app
|
24 |
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
|
__pycache__/app.cpython-312.pyc
CHANGED
Binary files a/__pycache__/app.cpython-312.pyc and b/__pycache__/app.cpython-312.pyc differ
|
|
app.py
CHANGED
@@ -20,6 +20,10 @@ from langchain_core.tools import tool
|
|
20 |
from langchain_core.callbacks import StreamingStdOutCallbackHandler, CallbackManager
|
21 |
from langchain_core.callbacks.base import BaseCallbackHandler
|
22 |
|
|
|
|
|
|
|
|
|
23 |
# ------------------------ Model Inference Wrapper ------------------------
|
24 |
|
25 |
class ChatQwen:
|
@@ -57,7 +61,7 @@ class ChatQwen:
|
|
57 |
model_path=model_path,
|
58 |
temperature=self.temperature,
|
59 |
# n_ctx=512,
|
60 |
-
n_ctx=
|
61 |
n_threads=4, # Adjust as needed
|
62 |
batch_size=512,
|
63 |
verbose=False,
|
@@ -406,8 +410,8 @@ def generate_response(state: dict) -> dict:
|
|
406 |
messages.append({"role": "system", "content": "When responding, use only the provided property details."})
|
407 |
|
408 |
# Add conversation history
|
409 |
-
# Truncate conversation history (last
|
410 |
-
truncated_history = state.get("messages", [])[-
|
411 |
for msg in truncated_history:
|
412 |
messages.append({"role": msg["role"], "content": msg["content"]})
|
413 |
|
@@ -689,7 +693,7 @@ def stream_query(query: str, connection_id: str, loop):
|
|
689 |
# Always update current_properties from final state
|
690 |
conv_manager.current_properties = final_state.get("current_properties", [])
|
691 |
# Keep conversation history bounded
|
692 |
-
conv_manager.conversation_history = conv_manager.conversation_history[-
|
693 |
|
694 |
except Exception as e:
|
695 |
error_msg = f"Error processing query: {str(e)}"
|
@@ -730,20 +734,30 @@ async def post_query(query: str):
|
|
730 |
return {"response": response}
|
731 |
|
732 |
|
733 |
-
@app.get("/setup")
|
734 |
-
async def setup():
|
735 |
-
import os
|
736 |
-
from huggingface_hub import hf_hub_download
|
737 |
-
repo_id="Qwen/Qwen2.5-1.5B-Instruct-GGUF"
|
738 |
-
filename = "qwen2.5-1.5b-instruct-q4_k_m.gguf"
|
739 |
-
script_dir = os.path.dirname(os.path.abspath(__file__))
|
740 |
-
model_path = hf_hub_download(
|
741 |
-
repo_id=repo_id,
|
742 |
-
filename=filename,
|
743 |
-
local_dir=script_dir,
|
744 |
-
local_dir_use_symlinks=False,
|
745 |
-
)
|
746 |
-
return model_path
|
747 |
|
748 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
749 |
|
|
|
|
|
|
|
|
20 |
from langchain_core.callbacks import StreamingStdOutCallbackHandler, CallbackManager
|
21 |
from langchain_core.callbacks.base import BaseCallbackHandler
|
22 |
|
23 |
+
import os
|
24 |
+
from fastapi.responses import PlainTextResponse
|
25 |
+
from fastapi import FastAPI, Request
|
26 |
+
from fastapi.staticfiles import StaticFiles
|
27 |
# ------------------------ Model Inference Wrapper ------------------------
|
28 |
|
29 |
class ChatQwen:
|
|
|
61 |
model_path=model_path,
|
62 |
temperature=self.temperature,
|
63 |
# n_ctx=512,
|
64 |
+
n_ctx=8192,
|
65 |
n_threads=4, # Adjust as needed
|
66 |
batch_size=512,
|
67 |
verbose=False,
|
|
|
410 |
messages.append({"role": "system", "content": "When responding, use only the provided property details."})
|
411 |
|
412 |
# Add conversation history
|
413 |
+
# Truncate conversation history (last 4 exchanges)
|
414 |
+
truncated_history = state.get("messages", [])[-8:] # Last 4 user+assistant pairs
|
415 |
for msg in truncated_history:
|
416 |
messages.append({"role": msg["role"], "content": msg["content"]})
|
417 |
|
|
|
693 |
# Always update current_properties from final state
|
694 |
conv_manager.current_properties = final_state.get("current_properties", [])
|
695 |
# Keep conversation history bounded
|
696 |
+
conv_manager.conversation_history = conv_manager.conversation_history[-12:] # Last 6 exchanges
|
697 |
|
698 |
except Exception as e:
|
699 |
error_msg = f"Error processing query: {str(e)}"
|
|
|
734 |
return {"response": response}
|
735 |
|
736 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
737 |
|
738 |
|
739 |
+
model_url = "https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct-GGUF/resolve/main/qwen2.5-1.5b-instruct-q4_k_m.gguf"
|
740 |
+
async def async_download():
|
741 |
+
import aiohttp
|
742 |
+
async with aiohttp.ClientSession() as session:
|
743 |
+
async with session.get(model_url) as response:
|
744 |
+
with open(model_path, "wb") as f:
|
745 |
+
while True:
|
746 |
+
chunk = await response.content.read(1024)
|
747 |
+
if not chunk:
|
748 |
+
break
|
749 |
+
f.write(chunk)
|
750 |
+
|
751 |
+
@app.middleware("http")
|
752 |
+
async def check_model_middleware(request: Request, call_next):
|
753 |
+
if not os.path.exists(model_path):
|
754 |
+
await async_download()
|
755 |
+
print("successfully downloaded")
|
756 |
+
else:
|
757 |
+
print("already downloaded")
|
758 |
+
return await call_next(request)
|
759 |
+
|
760 |
|
761 |
+
@app.get("/")
|
762 |
+
async def home():
|
763 |
+
return PlainTextResponse("Space is running. Model ready!")
|
download.py
CHANGED
@@ -8,7 +8,6 @@ model_path = hf_hub_download(
|
|
8 |
repo_id=repo_id,
|
9 |
filename=filename,
|
10 |
local_dir=script_dir,
|
11 |
-
local_dir_use_symlinks=False, # optional: don't use symlinks
|
12 |
)
|
13 |
|
14 |
print(f"Model downloaded to: {model_path}")
|
|
|
8 |
repo_id=repo_id,
|
9 |
filename=filename,
|
10 |
local_dir=script_dir,
|
|
|
11 |
)
|
12 |
|
13 |
print(f"Model downloaded to: {model_path}")
|