pathakDev10 commited on
Commit
dd1baa0
·
1 Parent(s): 0d57a92
Files changed (5) hide show
  1. .gitignore +2 -1
  2. Dockerfile +10 -1
  3. __pycache__/app.cpython-312.pyc +0 -0
  4. app.py +32 -18
  5. download.py +0 -1
.gitignore CHANGED
@@ -1,2 +1,3 @@
1
  qwen2.5-1.5b-instruct-q4_k_m.gguf
2
- qwen2.5-1.5b-instruct-q5_k_m.gguf
 
 
1
  qwen2.5-1.5b-instruct-q4_k_m.gguf
2
+ qwen2.5-1.5b-instruct-q5_k_m.gguf
3
+ *.gguf
Dockerfile CHANGED
@@ -10,6 +10,15 @@ WORKDIR /app
10
  COPY --chown=user ./requirements.txt requirements.txt
11
  RUN pip install --no-cache-dir --upgrade -r requirements.txt
12
 
13
- RUN wget https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct-GGUF/resolve/main/qwen2.5-1.5b-instruct-q4_k_m.gguf -O qwen2.5-1.5b-instruct-q4_k_m.gguf
 
 
 
 
 
 
 
 
 
14
  COPY --chown=user . /app
15
  CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
 
10
  COPY --chown=user ./requirements.txt requirements.txt
11
  RUN pip install --no-cache-dir --upgrade -r requirements.txt
12
 
13
+ RUN python -c "\
14
+ from huggingface_hub import hf_hub_download; \
15
+ hf_hub_download(\
16
+ repo_id='Qwen/Qwen2.5-1.5B-Instruct-GGUF', \
17
+ filename='qwen2.5-1.5b-instruct-q4_k_m.gguf', \
18
+ local_dir='.', \
19
+ local_dir_use_symlinks=False, \
20
+ token='$HF_TOKEN'\
21
+ )"
22
+
23
  COPY --chown=user . /app
24
  CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
__pycache__/app.cpython-312.pyc CHANGED
Binary files a/__pycache__/app.cpython-312.pyc and b/__pycache__/app.cpython-312.pyc differ
 
app.py CHANGED
@@ -20,6 +20,10 @@ from langchain_core.tools import tool
20
  from langchain_core.callbacks import StreamingStdOutCallbackHandler, CallbackManager
21
  from langchain_core.callbacks.base import BaseCallbackHandler
22
 
 
 
 
 
23
  # ------------------------ Model Inference Wrapper ------------------------
24
 
25
  class ChatQwen:
@@ -57,7 +61,7 @@ class ChatQwen:
57
  model_path=model_path,
58
  temperature=self.temperature,
59
  # n_ctx=512,
60
- n_ctx=2048,
61
  n_threads=4, # Adjust as needed
62
  batch_size=512,
63
  verbose=False,
@@ -406,8 +410,8 @@ def generate_response(state: dict) -> dict:
406
  messages.append({"role": "system", "content": "When responding, use only the provided property details."})
407
 
408
  # Add conversation history
409
- # Truncate conversation history (last 2 exchanges)
410
- truncated_history = state.get("messages", [])[-4:] # Last 2 user+assistant pairs
411
  for msg in truncated_history:
412
  messages.append({"role": msg["role"], "content": msg["content"]})
413
 
@@ -689,7 +693,7 @@ def stream_query(query: str, connection_id: str, loop):
689
  # Always update current_properties from final state
690
  conv_manager.current_properties = final_state.get("current_properties", [])
691
  # Keep conversation history bounded
692
- conv_manager.conversation_history = conv_manager.conversation_history[-6:] # Last 3 exchanges
693
 
694
  except Exception as e:
695
  error_msg = f"Error processing query: {str(e)}"
@@ -730,20 +734,30 @@ async def post_query(query: str):
730
  return {"response": response}
731
 
732
 
733
- @app.get("/setup")
734
- async def setup():
735
- import os
736
- from huggingface_hub import hf_hub_download
737
- repo_id="Qwen/Qwen2.5-1.5B-Instruct-GGUF"
738
- filename = "qwen2.5-1.5b-instruct-q4_k_m.gguf"
739
- script_dir = os.path.dirname(os.path.abspath(__file__))
740
- model_path = hf_hub_download(
741
- repo_id=repo_id,
742
- filename=filename,
743
- local_dir=script_dir,
744
- local_dir_use_symlinks=False,
745
- )
746
- return model_path
747
 
748
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
749
 
 
 
 
 
20
  from langchain_core.callbacks import StreamingStdOutCallbackHandler, CallbackManager
21
  from langchain_core.callbacks.base import BaseCallbackHandler
22
 
23
+ import os
24
+ from fastapi.responses import PlainTextResponse
25
+ from fastapi import FastAPI, Request
26
+ from fastapi.staticfiles import StaticFiles
27
  # ------------------------ Model Inference Wrapper ------------------------
28
 
29
  class ChatQwen:
 
61
  model_path=model_path,
62
  temperature=self.temperature,
63
  # n_ctx=512,
64
+ n_ctx=8192,
65
  n_threads=4, # Adjust as needed
66
  batch_size=512,
67
  verbose=False,
 
410
  messages.append({"role": "system", "content": "When responding, use only the provided property details."})
411
 
412
  # Add conversation history
413
+ # Truncate conversation history (last 4 exchanges)
414
+ truncated_history = state.get("messages", [])[-8:] # Last 4 user+assistant pairs
415
  for msg in truncated_history:
416
  messages.append({"role": msg["role"], "content": msg["content"]})
417
 
 
693
  # Always update current_properties from final state
694
  conv_manager.current_properties = final_state.get("current_properties", [])
695
  # Keep conversation history bounded
696
+ conv_manager.conversation_history = conv_manager.conversation_history[-12:] # Last 6 exchanges
697
 
698
  except Exception as e:
699
  error_msg = f"Error processing query: {str(e)}"
 
734
  return {"response": response}
735
 
736
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
737
 
738
 
739
+ model_url = "https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct-GGUF/resolve/main/qwen2.5-1.5b-instruct-q4_k_m.gguf"
740
+ async def async_download():
741
+ import aiohttp
742
+ async with aiohttp.ClientSession() as session:
743
+ async with session.get(model_url) as response:
744
+ with open(model_path, "wb") as f:
745
+ while True:
746
+ chunk = await response.content.read(1024)
747
+ if not chunk:
748
+ break
749
+ f.write(chunk)
750
+
751
+ @app.middleware("http")
752
+ async def check_model_middleware(request: Request, call_next):
753
+ if not os.path.exists(model_path):
754
+ await async_download()
755
+ print("successfully downloaded")
756
+ else:
757
+ print("already downloaded")
758
+ return await call_next(request)
759
+
760
 
761
+ @app.get("/")
762
+ async def home():
763
+ return PlainTextResponse("Space is running. Model ready!")
download.py CHANGED
@@ -8,7 +8,6 @@ model_path = hf_hub_download(
8
  repo_id=repo_id,
9
  filename=filename,
10
  local_dir=script_dir,
11
- local_dir_use_symlinks=False, # optional: don't use symlinks
12
  )
13
 
14
  print(f"Model downloaded to: {model_path}")
 
8
  repo_id=repo_id,
9
  filename=filename,
10
  local_dir=script_dir,
 
11
  )
12
 
13
  print(f"Model downloaded to: {model_path}")