Spaces:
Running
on
Zero
Running
on
Zero
jedick
commited on
Commit
Β·
9d0646a
1
Parent(s):
951d2c0
Change model to Qwen3-14B
Browse files
app.py
CHANGED
@@ -42,7 +42,7 @@ def run_workflow(input, history, compute_mode, thread_id, session_hash):
|
|
42 |
if compute_mode == "local":
|
43 |
if not torch.cuda.is_available():
|
44 |
raise gr.Error(
|
45 |
-
"Local mode requires GPU.
|
46 |
print_exception=False,
|
47 |
)
|
48 |
|
@@ -244,7 +244,11 @@ with gr.Blocks(
|
|
244 |
],
|
245 |
value=("local" if torch.cuda.is_available() else "remote"),
|
246 |
label="Compute Mode",
|
247 |
-
info=(
|
|
|
|
|
|
|
|
|
248 |
render=False,
|
249 |
)
|
250 |
|
@@ -355,8 +359,8 @@ with gr.Blocks(
|
|
355 |
if compute_mode == "local":
|
356 |
status_text = f"""
|
357 |
π Now in **local** mode, using ZeroGPU hardware<br>
|
358 |
-
β Response time is around
|
359 |
-
β¨ [
|
360 |
π See the project's [GitHub repository](https://github.com/jedick/R-help-chat)
|
361 |
"""
|
362 |
return status_text
|
|
|
42 |
if compute_mode == "local":
|
43 |
if not torch.cuda.is_available():
|
44 |
raise gr.Error(
|
45 |
+
"Local mode requires GPU.",
|
46 |
print_exception=False,
|
47 |
)
|
48 |
|
|
|
244 |
],
|
245 |
value=("local" if torch.cuda.is_available() else "remote"),
|
246 |
label="Compute Mode",
|
247 |
+
info=(
|
248 |
+
"NOTE: remote mode is available even if you have exceeded your ZeroGPU quota"
|
249 |
+
if torch.cuda.is_available()
|
250 |
+
else "NOTE: local mode requires GPU"
|
251 |
+
),
|
252 |
render=False,
|
253 |
)
|
254 |
|
|
|
359 |
if compute_mode == "local":
|
360 |
status_text = f"""
|
361 |
π Now in **local** mode, using ZeroGPU hardware<br>
|
362 |
+
β Response time is around 1 minute<br>
|
363 |
+
β¨ [nomic-embed-text-v1.5](https://huggingface.co/nomic-ai/nomic-embed-text-v1.5) and [{model_id.split("/")[-1]}](https://huggingface.co/{model_id})<br>
|
364 |
π See the project's [GitHub repository](https://github.com/jedick/R-help-chat)
|
365 |
"""
|
366 |
return status_text
|
graph.py
CHANGED
@@ -9,7 +9,7 @@ import os
|
|
9 |
|
10 |
# Local modules
|
11 |
from retriever import BuildRetriever
|
12 |
-
from prompts import query_prompt, generate_prompt,
|
13 |
from mods.tool_calling_llm import ToolCallingLLM
|
14 |
|
15 |
# Local modules
|
@@ -81,7 +81,7 @@ def ToolifyHF(chat_model, system_message, system_message_suffix="", think=False)
|
|
81 |
# system_message = "/no_think\n" + system_message
|
82 |
|
83 |
# Combine system prompt and tools template
|
84 |
-
tool_system_prompt_template = system_message +
|
85 |
|
86 |
class HuggingFaceWithTools(ToolCallingLLM, ChatHuggingFace):
|
87 |
|
|
|
9 |
|
10 |
# Local modules
|
11 |
from retriever import BuildRetriever
|
12 |
+
from prompts import query_prompt, generate_prompt, generic_tools_template
|
13 |
from mods.tool_calling_llm import ToolCallingLLM
|
14 |
|
15 |
# Local modules
|
|
|
81 |
# system_message = "/no_think\n" + system_message
|
82 |
|
83 |
# Combine system prompt and tools template
|
84 |
+
tool_system_prompt_template = system_message + generic_tools_template
|
85 |
|
86 |
class HuggingFaceWithTools(ToolCallingLLM, ChatHuggingFace):
|
87 |
|
main.py
CHANGED
@@ -40,7 +40,8 @@ openai_model = "gpt-4o-mini"
|
|
40 |
model_id = os.getenv("MODEL_ID")
|
41 |
if model_id is None:
|
42 |
# model_id = "HuggingFaceTB/SmolLM3-3B"
|
43 |
-
model_id = "google/gemma-3-12b-it"
|
|
|
44 |
|
45 |
# Suppress these messages:
|
46 |
# INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
|
|
|
40 |
model_id = os.getenv("MODEL_ID")
|
41 |
if model_id is None:
|
42 |
# model_id = "HuggingFaceTB/SmolLM3-3B"
|
43 |
+
# model_id = "google/gemma-3-12b-it"
|
44 |
+
model_id = "Qwen/Qwen3-14B"
|
45 |
|
46 |
# Suppress these messages:
|
47 |
# INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
|
prompts.py
CHANGED
@@ -84,9 +84,9 @@ You must always select one of the above tools and respond with only a JSON objec
|
|
84 |
|
85 |
"""
|
86 |
|
87 |
-
# Prompt template for Gemma
|
88 |
# Based on https://ai.google.dev/gemma/docs/capabilities/function-calling
|
89 |
-
|
90 |
|
91 |
### Functions
|
92 |
|
|
|
84 |
|
85 |
"""
|
86 |
|
87 |
+
# Prompt template for Gemma/Qwen with tools
|
88 |
# Based on https://ai.google.dev/gemma/docs/capabilities/function-calling
|
89 |
+
generic_tools_template = """
|
90 |
|
91 |
### Functions
|
92 |
|