jedick commited on
Commit
27b6f54
·
1 Parent(s): fb36fb0

Download model before running workflow

Browse files
Files changed (2) hide show
  1. app.py +11 -9
  2. main.py +8 -0
app.py CHANGED
@@ -4,7 +4,7 @@ from graph import BuildGraph
4
  from retriever import db_dir
5
  from langgraph.checkpoint.memory import MemorySaver
6
  from dotenv import load_dotenv
7
- from main import openai_model, model_id
8
  from util import get_sources, get_start_end_months
9
  from mods.tool_calling_llm import extract_think
10
  import requests
@@ -82,7 +82,6 @@ def run_workflow(input, history, compute_mode, thread_id, session_hash):
82
  if compute_mode == "local":
83
  gr.Info(
84
  f"Please wait for the local model to load",
85
- duration=15,
86
  title=f"Model loading...",
87
  )
88
  # Get the chat model and build the graph
@@ -211,6 +210,11 @@ def to_workflow(request: gr.Request, *args):
211
  # Add session_hash to arguments
212
  new_args = args + (request.session_hash,)
213
  if compute_mode == "local":
 
 
 
 
 
214
  for value in run_workflow_local(*new_args):
215
  yield value
216
  if compute_mode == "remote":
@@ -218,7 +222,7 @@ def to_workflow(request: gr.Request, *args):
218
  yield value
219
 
220
 
221
- @spaces.GPU(duration=120)
222
  def run_workflow_local(*args):
223
  for value in run_workflow(*args):
224
  yield value
@@ -264,13 +268,11 @@ with gr.Blocks(
264
  "local",
265
  "remote",
266
  ],
267
- value=("local" if torch.cuda.is_available() else "remote"),
 
 
268
  label="Compute Mode",
269
- info=(
270
- "NOTE: remote mode **does not** use ZeroGPU"
271
- if torch.cuda.is_available()
272
- else "NOTE: local mode requires GPU"
273
- ),
274
  render=False,
275
  )
276
 
 
4
  from retriever import db_dir
5
  from langgraph.checkpoint.memory import MemorySaver
6
  from dotenv import load_dotenv
7
+ from main import openai_model, model_id, DownloadChatModel
8
  from util import get_sources, get_start_end_months
9
  from mods.tool_calling_llm import extract_think
10
  import requests
 
82
  if compute_mode == "local":
83
  gr.Info(
84
  f"Please wait for the local model to load",
 
85
  title=f"Model loading...",
86
  )
87
  # Get the chat model and build the graph
 
210
  # Add session_hash to arguments
211
  new_args = args + (request.session_hash,)
212
  if compute_mode == "local":
213
+ # If graph hasn't been instantiated, download model before running workflow
214
+ graph = graph_instances[compute_mode].get(request.session_hash)
215
+ if graph is None:
216
+ DownloadChatModel()
217
+ # Call the workflow function with the @spaces.GPU decorator
218
  for value in run_workflow_local(*new_args):
219
  yield value
220
  if compute_mode == "remote":
 
222
  yield value
223
 
224
 
225
+ @spaces.GPU(duration=90)
226
  def run_workflow_local(*args):
227
  for value in run_workflow(*args):
228
  yield value
 
268
  "local",
269
  "remote",
270
  ],
271
+ # Default to remote because it provides a better first impression for most people
272
+ # value=("local" if torch.cuda.is_available() else "remote"),
273
+ value="remote",
274
  label="Compute Mode",
275
+ info="NOTE: remote mode **does not** use ZeroGPU",
 
 
 
 
276
  render=False,
277
  )
278
 
main.py CHANGED
@@ -5,6 +5,7 @@ from langchain_core.output_parsers import StrOutputParser
5
  from langgraph.checkpoint.memory import MemorySaver
6
  from langchain_core.messages import ToolMessage
7
  from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
 
8
  from datetime import datetime
9
  from dotenv import load_dotenv
10
  import os
@@ -128,6 +129,13 @@ def ProcessDirectory(path, compute_mode):
128
  print(f"Chroma: no change for {file_path}")
129
 
130
 
 
 
 
 
 
 
 
131
  def GetChatModel(compute_mode):
132
  """
133
  Get a chat model.
 
5
  from langgraph.checkpoint.memory import MemorySaver
6
  from langchain_core.messages import ToolMessage
7
  from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
8
+ from huggingface_hub import snapshot_download
9
  from datetime import datetime
10
  from dotenv import load_dotenv
11
  import os
 
129
  print(f"Chroma: no change for {file_path}")
130
 
131
 
132
+ def DownloadChatModel():
133
+ """
134
+ Downloads a chat model to the local Hugging Face cache.
135
+ """
136
+ snapshot_download(model_id)
137
+
138
+
139
  def GetChatModel(compute_mode):
140
  """
141
  Get a chat model.