Spaces:

jedick
/

R-help-chat

Running on Zero

App Files Files Community

jedick commited on 28 days ago

Commit

9d76733

1 Parent(s): 193aa8d

Revert model downloading

Browse files

Files changed (2) hide show

app.py +5 -9
main.py +2 -22

app.py CHANGED Viewed

@@ -4,7 +4,7 @@ from graph import BuildGraph
 from retriever import db_dir
 from langgraph.checkpoint.memory import MemorySaver
 from dotenv import load_dotenv
-from main import openai_model, model_id, DownloadChatModel
 from util import get_sources, get_start_end_months
 from mods.tool_calling_llm import extract_think
 import requests
@@ -82,6 +82,7 @@ def run_workflow(input, history, compute_mode, thread_id, session_hash):
         if compute_mode == "local":
             gr.Info(
                 f"Please wait for the local model to load",
                 title=f"Model loading...",
             )
         # Get the chat model and build the graph
@@ -210,11 +211,6 @@ def to_workflow(request: gr.Request, *args):
     # Add session_hash to arguments
     new_args = args + (request.session_hash,)
     if compute_mode == "local":
-        # If graph hasn't been instantiated, download model before running workflow
-        graph = graph_instances[compute_mode].get(request.session_hash)
-        if graph is None:
-            gr.Info("Downloading model, please wait", title="Downloading model...")
-            DownloadChatModel()
         # Call the workflow function with the @spaces.GPU decorator
         for value in run_workflow_local(*new_args):
             yield value
@@ -658,9 +654,9 @@ with gr.Blocks(
                 # For S3 (need AWS_ACCESS_KEY_ID and AWS_ACCESS_KEY_SECRET)
                 download_file_from_bucket("r-help-chat", "db.zip", "db.zip")
                 ## For Dropbox (shared file - key is in URL)
-                #shared_link = "https://www.dropbox.com/scl/fi/jx90g5lorpgkkyyzeurtc/db.zip?rlkey=wvqa3p9hdy4rmod1r8yf2am09&st=l9tsam56&dl=0"
-                #output_filename = "db.zip"
-                #download_dropbox_file(shared_link, output_filename)
         return None

 from retriever import db_dir
 from langgraph.checkpoint.memory import MemorySaver
 from dotenv import load_dotenv
+from main import openai_model, model_id
 from util import get_sources, get_start_end_months
 from mods.tool_calling_llm import extract_think
 import requests
         if compute_mode == "local":
             gr.Info(
                 f"Please wait for the local model to load",
+                duration=15,
                 title=f"Model loading...",
             )
         # Get the chat model and build the graph
     # Add session_hash to arguments
     new_args = args + (request.session_hash,)
     if compute_mode == "local":
         # Call the workflow function with the @spaces.GPU decorator
         for value in run_workflow_local(*new_args):
             yield value
                 # For S3 (need AWS_ACCESS_KEY_ID and AWS_ACCESS_KEY_SECRET)
                 download_file_from_bucket("r-help-chat", "db.zip", "db.zip")
                 ## For Dropbox (shared file - key is in URL)
+                # shared_link = "https://www.dropbox.com/scl/fi/jx90g5lorpgkkyyzeurtc/db.zip?rlkey=wvqa3p9hdy4rmod1r8yf2am09&st=l9tsam56&dl=0"
+                # output_filename = "db.zip"
+                # download_dropbox_file(shared_link, output_filename)
         return None

main.py CHANGED Viewed

@@ -5,7 +5,6 @@ from langchain_core.output_parsers import StrOutputParser
 from langgraph.checkpoint.memory import MemorySaver
 from langchain_core.messages import ToolMessage
 from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
-from huggingface_hub import snapshot_download
 from datetime import datetime
 from dotenv import load_dotenv
 import os
@@ -129,16 +128,6 @@ def ProcessDirectory(path, compute_mode):
             print(f"Chroma: no change for {file_path}")
-def DownloadChatModel():
-    """
-    Downloads a chat model to a local directory.
-    """
-    # Local directory is "./<repo_name>"
-    repo_name = model_id.split("/")[-1]
-    local_dir = f"./{repo_name}"
-    snapshot_download(model_id, local_dir=local_dir)
 def GetChatModel(compute_mode):
     """
     Get a chat model.
@@ -157,20 +146,11 @@ def GetChatModel(compute_mode):
         if compute_mode == "local" and not torch.cuda.is_available():
             raise Exception("Local chat model selected without GPU")
-        # Use local directory for model if it exists
-        repo_name = model_id.split("/")[-1]
-        local_dir = f"./{repo_name}"
-        if os.path.isdir(local_dir):
-            print("Using local directory for model")
-            id_or_dir = local_dir
-        else:
-            id_or_dir = model_id
         # Define the pipeline to pass to the HuggingFacePipeline class
         # https://huggingface.co/blog/langchain
-        tokenizer = AutoTokenizer.from_pretrained(id_or_dir)
         model = AutoModelForCausalLM.from_pretrained(
-            id_or_dir,
             # We need this to load the model in BF16 instead of fp32 (torch.float)
             torch_dtype=torch.bfloat16,
         )

 from langgraph.checkpoint.memory import MemorySaver
 from langchain_core.messages import ToolMessage
 from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
 from datetime import datetime
 from dotenv import load_dotenv
 import os
             print(f"Chroma: no change for {file_path}")
 def GetChatModel(compute_mode):
     """
     Get a chat model.
         if compute_mode == "local" and not torch.cuda.is_available():
             raise Exception("Local chat model selected without GPU")
         # Define the pipeline to pass to the HuggingFacePipeline class
         # https://huggingface.co/blog/langchain
+        tokenizer = AutoTokenizer.from_pretrained(model_id)
         model = AutoModelForCausalLM.from_pretrained(
+            model_id,
             # We need this to load the model in BF16 instead of fp32 (torch.float)
             torch_dtype=torch.bfloat16,
         )