Spaces:
Sleeping
Sleeping
jedick
commited on
Commit
·
27b6f54
1
Parent(s):
fb36fb0
Download model before running workflow
Browse files
app.py
CHANGED
@@ -4,7 +4,7 @@ from graph import BuildGraph
|
|
4 |
from retriever import db_dir
|
5 |
from langgraph.checkpoint.memory import MemorySaver
|
6 |
from dotenv import load_dotenv
|
7 |
-
from main import openai_model, model_id
|
8 |
from util import get_sources, get_start_end_months
|
9 |
from mods.tool_calling_llm import extract_think
|
10 |
import requests
|
@@ -82,7 +82,6 @@ def run_workflow(input, history, compute_mode, thread_id, session_hash):
|
|
82 |
if compute_mode == "local":
|
83 |
gr.Info(
|
84 |
f"Please wait for the local model to load",
|
85 |
-
duration=15,
|
86 |
title=f"Model loading...",
|
87 |
)
|
88 |
# Get the chat model and build the graph
|
@@ -211,6 +210,11 @@ def to_workflow(request: gr.Request, *args):
|
|
211 |
# Add session_hash to arguments
|
212 |
new_args = args + (request.session_hash,)
|
213 |
if compute_mode == "local":
|
|
|
|
|
|
|
|
|
|
|
214 |
for value in run_workflow_local(*new_args):
|
215 |
yield value
|
216 |
if compute_mode == "remote":
|
@@ -218,7 +222,7 @@ def to_workflow(request: gr.Request, *args):
|
|
218 |
yield value
|
219 |
|
220 |
|
221 |
-
@spaces.GPU(duration=
|
222 |
def run_workflow_local(*args):
|
223 |
for value in run_workflow(*args):
|
224 |
yield value
|
@@ -264,13 +268,11 @@ with gr.Blocks(
|
|
264 |
"local",
|
265 |
"remote",
|
266 |
],
|
267 |
-
|
|
|
|
|
268 |
label="Compute Mode",
|
269 |
-
info=
|
270 |
-
"NOTE: remote mode **does not** use ZeroGPU"
|
271 |
-
if torch.cuda.is_available()
|
272 |
-
else "NOTE: local mode requires GPU"
|
273 |
-
),
|
274 |
render=False,
|
275 |
)
|
276 |
|
|
|
4 |
from retriever import db_dir
|
5 |
from langgraph.checkpoint.memory import MemorySaver
|
6 |
from dotenv import load_dotenv
|
7 |
+
from main import openai_model, model_id, DownloadChatModel
|
8 |
from util import get_sources, get_start_end_months
|
9 |
from mods.tool_calling_llm import extract_think
|
10 |
import requests
|
|
|
82 |
if compute_mode == "local":
|
83 |
gr.Info(
|
84 |
f"Please wait for the local model to load",
|
|
|
85 |
title=f"Model loading...",
|
86 |
)
|
87 |
# Get the chat model and build the graph
|
|
|
210 |
# Add session_hash to arguments
|
211 |
new_args = args + (request.session_hash,)
|
212 |
if compute_mode == "local":
|
213 |
+
# If graph hasn't been instantiated, download model before running workflow
|
214 |
+
graph = graph_instances[compute_mode].get(request.session_hash)
|
215 |
+
if graph is None:
|
216 |
+
DownloadChatModel()
|
217 |
+
# Call the workflow function with the @spaces.GPU decorator
|
218 |
for value in run_workflow_local(*new_args):
|
219 |
yield value
|
220 |
if compute_mode == "remote":
|
|
|
222 |
yield value
|
223 |
|
224 |
|
225 |
+
@spaces.GPU(duration=90)
|
226 |
def run_workflow_local(*args):
|
227 |
for value in run_workflow(*args):
|
228 |
yield value
|
|
|
268 |
"local",
|
269 |
"remote",
|
270 |
],
|
271 |
+
# Default to remote because it provides a better first impression for most people
|
272 |
+
# value=("local" if torch.cuda.is_available() else "remote"),
|
273 |
+
value="remote",
|
274 |
label="Compute Mode",
|
275 |
+
info="NOTE: remote mode **does not** use ZeroGPU",
|
|
|
|
|
|
|
|
|
276 |
render=False,
|
277 |
)
|
278 |
|
main.py
CHANGED
@@ -5,6 +5,7 @@ from langchain_core.output_parsers import StrOutputParser
|
|
5 |
from langgraph.checkpoint.memory import MemorySaver
|
6 |
from langchain_core.messages import ToolMessage
|
7 |
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
|
|
|
8 |
from datetime import datetime
|
9 |
from dotenv import load_dotenv
|
10 |
import os
|
@@ -128,6 +129,13 @@ def ProcessDirectory(path, compute_mode):
|
|
128 |
print(f"Chroma: no change for {file_path}")
|
129 |
|
130 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
131 |
def GetChatModel(compute_mode):
|
132 |
"""
|
133 |
Get a chat model.
|
|
|
5 |
from langgraph.checkpoint.memory import MemorySaver
|
6 |
from langchain_core.messages import ToolMessage
|
7 |
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
|
8 |
+
from huggingface_hub import snapshot_download
|
9 |
from datetime import datetime
|
10 |
from dotenv import load_dotenv
|
11 |
import os
|
|
|
129 |
print(f"Chroma: no change for {file_path}")
|
130 |
|
131 |
|
132 |
+
def DownloadChatModel():
|
133 |
+
"""
|
134 |
+
Downloads a chat model to the local Hugging Face cache.
|
135 |
+
"""
|
136 |
+
snapshot_download(model_id)
|
137 |
+
|
138 |
+
|
139 |
def GetChatModel(compute_mode):
|
140 |
"""
|
141 |
Get a chat model.
|