Spaces:
Running
on
Zero
Running
on
Zero
jedick
commited on
Commit
Β·
555a40e
1
Parent(s):
1b28bd4
Change chat model to google/gemma-3-12b-it
Browse files
app.py
CHANGED
@@ -416,8 +416,6 @@ with gr.Blocks(
|
|
416 |
status_text = f"""
|
417 |
π Now in **local** mode, using ZeroGPU hardware<br>
|
418 |
β Response time is about one minute<br>
|
419 |
-
π§ Add **/think** to enable thinking</br>
|
420 |
-
  π’ Increases ZeroGPU allotment to 100 seconds</br>
|
421 |
β¨ [{embedding_model_id.split("/")[-1]}](https://huggingface.co/{embedding_model_id}) and [{model_id.split("/")[-1]}](https://huggingface.co/{model_id})<br>
|
422 |
π See the project's [GitHub repository](https://github.com/jedick/R-help-chat)
|
423 |
"""
|
@@ -461,7 +459,7 @@ with gr.Blocks(
|
|
461 |
def get_multi_tool_questions(compute_mode, as_dataset=True):
|
462 |
"""Get multi-tool example questions based on compute mode"""
|
463 |
questions = [
|
464 |
-
"Differences between lapply and for loops
|
465 |
"Discuss pipe operator usage in 2022, 2023, and 2024",
|
466 |
]
|
467 |
|
@@ -474,7 +472,7 @@ with gr.Blocks(
|
|
474 |
"""Get multi-turn example questions based on compute mode"""
|
475 |
questions = [
|
476 |
"Lookup emails that reference bugs.r-project.org in 2025",
|
477 |
-
"Did those authors report bugs before 2025?
|
478 |
]
|
479 |
|
480 |
if compute_mode == "remote":
|
@@ -610,18 +608,6 @@ with gr.Blocks(
|
|
610 |
[compute_mode],
|
611 |
[status],
|
612 |
api_name=False,
|
613 |
-
).then(
|
614 |
-
# Update multi-tool examples based on compute mode
|
615 |
-
get_multi_tool_questions,
|
616 |
-
[compute_mode],
|
617 |
-
[multi_tool_questions.dataset],
|
618 |
-
api_name=False,
|
619 |
-
).then(
|
620 |
-
# Update multi-turn examples based on compute mode
|
621 |
-
get_multi_turn_questions,
|
622 |
-
[compute_mode],
|
623 |
-
[multi_turn_questions.dataset],
|
624 |
-
api_name=False,
|
625 |
)
|
626 |
|
627 |
input.submit(
|
|
|
416 |
status_text = f"""
|
417 |
π Now in **local** mode, using ZeroGPU hardware<br>
|
418 |
β Response time is about one minute<br>
|
|
|
|
|
419 |
β¨ [{embedding_model_id.split("/")[-1]}](https://huggingface.co/{embedding_model_id}) and [{model_id.split("/")[-1]}](https://huggingface.co/{model_id})<br>
|
420 |
π See the project's [GitHub repository](https://github.com/jedick/R-help-chat)
|
421 |
"""
|
|
|
459 |
def get_multi_tool_questions(compute_mode, as_dataset=True):
|
460 |
"""Get multi-tool example questions based on compute mode"""
|
461 |
questions = [
|
462 |
+
"Differences between lapply and for loops",
|
463 |
"Discuss pipe operator usage in 2022, 2023, and 2024",
|
464 |
]
|
465 |
|
|
|
472 |
"""Get multi-turn example questions based on compute mode"""
|
473 |
questions = [
|
474 |
"Lookup emails that reference bugs.r-project.org in 2025",
|
475 |
+
"Did those authors report bugs before 2025?",
|
476 |
]
|
477 |
|
478 |
if compute_mode == "remote":
|
|
|
608 |
[compute_mode],
|
609 |
[status],
|
610 |
api_name=False,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
611 |
)
|
612 |
|
613 |
input.submit(
|
graph.py
CHANGED
@@ -65,10 +65,11 @@ def normalize_messages(messages):
|
|
65 |
tool_messages = []
|
66 |
count = 1
|
67 |
while i < len(messages) and type(messages[i]) is ToolMessage:
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
|
|
72 |
count += 1
|
73 |
i += 1
|
74 |
|
|
|
65 |
tool_messages = []
|
66 |
count = 1
|
67 |
while i < len(messages) and type(messages[i]) is ToolMessage:
|
68 |
+
tool_msg = messages[i].content.replace(
|
69 |
+
"### Retrieved Emails:",
|
70 |
+
f"### Retrieved Emails from Tool Call {count}:",
|
71 |
+
)
|
72 |
+
tool_messages.append(tool_msg)
|
73 |
count += 1
|
74 |
i += 1
|
75 |
|
main.py
CHANGED
@@ -40,8 +40,8 @@ openai_model = "gpt-4o-mini"
|
|
40 |
model_id = os.getenv("MODEL_ID")
|
41 |
if model_id is None:
|
42 |
# model_id = "HuggingFaceTB/SmolLM3-3B"
|
43 |
-
|
44 |
-
model_id = "Qwen/Qwen3-14B"
|
45 |
|
46 |
# Suppress these messages:
|
47 |
# INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
|
|
|
40 |
model_id = os.getenv("MODEL_ID")
|
41 |
if model_id is None:
|
42 |
# model_id = "HuggingFaceTB/SmolLM3-3B"
|
43 |
+
model_id = "google/gemma-3-12b-it"
|
44 |
+
# model_id = "Qwen/Qwen3-14B"
|
45 |
|
46 |
# Suppress these messages:
|
47 |
# INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
|
prompts.py
CHANGED
@@ -36,8 +36,8 @@ def query_prompt(chat_model, think=False):
|
|
36 |
"For general summaries, use retrieve_emails(search_query='R'). "
|
37 |
"For questions about years, use retrieve_emails(search_query=<query>, start_year=, end_year=) (this month is this year). "
|
38 |
"For questions about months, use 3-letter abbreviations (Jan...Dec) for the 'month' argument. "
|
39 |
-
"
|
40 |
-
"You should always retrieve more emails based on
|
41 |
# "Even if retrieved emails are available, you should retrieve more emails to answer the most recent question. " # Qwen
|
42 |
# "You must perform the search yourself. Do not tell the user how to retrieve emails. " # Qwen
|
43 |
# "Do not use your memory or knowledge to answer the user's question. Only retrieve emails based on the user's question. " # Qwen
|
|
|
36 |
"For general summaries, use retrieve_emails(search_query='R'). "
|
37 |
"For questions about years, use retrieve_emails(search_query=<query>, start_year=, end_year=) (this month is this year). "
|
38 |
"For questions about months, use 3-letter abbreviations (Jan...Dec) for the 'month' argument. "
|
39 |
+
"Use all previous messages as context to formulate your search query. " # Gemma
|
40 |
+
"You should always retrieve more emails based on context and the most recent question. " # Qwen
|
41 |
# "Even if retrieved emails are available, you should retrieve more emails to answer the most recent question. " # Qwen
|
42 |
# "You must perform the search yourself. Do not tell the user how to retrieve emails. " # Qwen
|
43 |
# "Do not use your memory or knowledge to answer the user's question. Only retrieve emails based on the user's question. " # Qwen
|