nananie143 commited on
Commit
05661ec
·
verified ·
1 Parent(s): 627e1c6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -40
app.py CHANGED
@@ -1,5 +1,5 @@
1
  import gradio as gr
2
- from langchain_community.llms import LlamaCpp # Updated import
3
  import os
4
  import json
5
  import torch
@@ -15,6 +15,7 @@ import requests
15
  from pathlib import Path
16
  from tqdm import tqdm
17
  from contextlib import asynccontextmanager
 
18
 
19
  # Configure logging
20
  logging.basicConfig(level=logging.INFO)
@@ -27,33 +28,30 @@ class ChatCompletionRequest(BaseModel):
27
  max_tokens: Optional[int] = 2048
28
  stream: Optional[bool] = False
29
 
30
- def download_model(model_url: str, local_path: Path) -> Path:
31
- """Download the model file if it doesn't exist locally."""
32
- if local_path.exists():
33
- logger.info(f"Model already exists at {local_path}")
34
- return local_path
35
-
36
- logger.info(f"Downloading model from {model_url}")
37
- local_path.parent.mkdir(parents=True, exist_ok=True)
38
-
39
- response = requests.get(model_url, stream=True)
40
- total_size = int(response.headers.get('content-length', 0))
41
-
42
- with open(local_path, 'wb') as file, tqdm(
43
- desc=local_path.name,
44
- total=total_size,
45
- unit='iB',
46
- unit_scale=True,
47
- unit_divisor=1024,
48
- ) as pbar:
49
- for data in response.iter_content(chunk_size=1024):
50
- size = file.write(data)
51
- pbar.update(size)
52
-
53
- return local_path
54
 
55
  class QwenModel:
56
- def __init__(self, model_path: str):
57
  """Initialize the Qwen model with automatic device detection."""
58
  try:
59
  # Check for GPU availability
@@ -61,12 +59,9 @@ class QwenModel:
61
  self.device_count = torch.cuda.device_count() if self.has_gpu else 0
62
  logger.info(f"GPU available: {self.has_gpu}, Device count: {self.device_count}")
63
 
64
- # Ensure model path exists
65
- model_path = Path(model_path)
66
- if not model_path.exists():
67
- # If model doesn't exist locally, download it
68
- model_url = "https://huggingface.co/G17c21ds/Qwen2.5-14B-Instruct-Uncensored-Q8_0-GGUF/resolve/main/model.gguf"
69
- model_path = download_model(model_url, model_path)
70
 
71
  # Configure model parameters based on available hardware
72
  n_gpu_layers = 40 if self.has_gpu else 0
@@ -76,13 +71,13 @@ class QwenModel:
76
  model_path=str(model_path),
77
  n_gpu_layers=n_gpu_layers,
78
  n_ctx=4096,
79
- n_batch=512 if self.has_gpu else 128, # Reduced batch size for CPU
80
  verbose=True,
81
  temperature=0.7,
82
  max_tokens=2048,
83
  top_p=0.95,
84
  top_k=50,
85
- f16_kv=self.has_gpu, # Only use f16 when GPU is available
86
  use_mlock=True,
87
  use_mmap=True,
88
  )
@@ -107,8 +102,7 @@ async def lifespan(app: FastAPI):
107
  """Lifespan context manager for FastAPI startup and shutdown events."""
108
  global model
109
  try:
110
- model_path = Path("models/qwen-2.5-14b-gguf")
111
- model = QwenModel(model_path)
112
  logger.info("Model initialized successfully")
113
  yield
114
  finally:
@@ -117,18 +111,16 @@ async def lifespan(app: FastAPI):
117
 
118
  app = FastAPI(lifespan=lifespan)
119
 
120
- # ... [rest of the FastAPI routes and main function remain the same] ...
121
 
122
  def main():
123
  """Main function to initialize and launch the application."""
124
  try:
125
  global model
126
- # Model path
127
- model_path = Path("models/qwen-2.5-14b-gguf")
128
 
129
  # Initialize the model if not already initialized
130
  if model is None:
131
- model = QwenModel(model_path)
132
 
133
  # Create and launch the Gradio interface
134
  interface = create_gradio_interface(model)
 
1
  import gradio as gr
2
+ from langchain_community.llms import LlamaCpp
3
  import os
4
  import json
5
  import torch
 
15
  from pathlib import Path
16
  from tqdm import tqdm
17
  from contextlib import asynccontextmanager
18
+ from huggingface_hub import hf_hub_download
19
 
20
  # Configure logging
21
  logging.basicConfig(level=logging.INFO)
 
28
  max_tokens: Optional[int] = 2048
29
  stream: Optional[bool] = False
30
 
31
+ def download_model_from_hf():
32
+ """Download the model file from Hugging Face."""
33
+ try:
34
+ logger.info("Downloading model from Hugging Face Hub...")
35
+
36
+ # Create models directory if it doesn't exist
37
+ model_dir = Path("models")
38
+ model_dir.mkdir(exist_ok=True)
39
+
40
+ # Download the model using huggingface_hub
41
+ local_path = hf_hub_download(
42
+ repo_id="G17c21ds/Qwen2.5-14B-Instruct-Uncensored-Q8_0-GGUF",
43
+ filename="model.gguf",
44
+ local_dir=model_dir,
45
+ local_dir_use_symlinks=False
46
+ )
47
+
48
+ return Path(local_path)
49
+ except Exception as e:
50
+ logger.error(f"Error downloading model: {str(e)}")
51
+ raise
 
 
 
52
 
53
  class QwenModel:
54
+ def __init__(self):
55
  """Initialize the Qwen model with automatic device detection."""
56
  try:
57
  # Check for GPU availability
 
59
  self.device_count = torch.cuda.device_count() if self.has_gpu else 0
60
  logger.info(f"GPU available: {self.has_gpu}, Device count: {self.device_count}")
61
 
62
+ # Download or get the model
63
+ model_path = download_model_from_hf()
64
+ logger.info(f"Model path: {model_path}")
 
 
 
65
 
66
  # Configure model parameters based on available hardware
67
  n_gpu_layers = 40 if self.has_gpu else 0
 
71
  model_path=str(model_path),
72
  n_gpu_layers=n_gpu_layers,
73
  n_ctx=4096,
74
+ n_batch=512 if self.has_gpu else 128,
75
  verbose=True,
76
  temperature=0.7,
77
  max_tokens=2048,
78
  top_p=0.95,
79
  top_k=50,
80
+ f16_kv=self.has_gpu,
81
  use_mlock=True,
82
  use_mmap=True,
83
  )
 
102
  """Lifespan context manager for FastAPI startup and shutdown events."""
103
  global model
104
  try:
105
+ model = QwenModel()
 
106
  logger.info("Model initialized successfully")
107
  yield
108
  finally:
 
111
 
112
  app = FastAPI(lifespan=lifespan)
113
 
114
+ # ... [rest of the FastAPI routes remain the same] ...
115
 
116
  def main():
117
  """Main function to initialize and launch the application."""
118
  try:
119
  global model
 
 
120
 
121
  # Initialize the model if not already initialized
122
  if model is None:
123
+ model = QwenModel()
124
 
125
  # Create and launch the Gradio interface
126
  interface = create_gradio_interface(model)