theosaurus
commited on
Commit
·
ed36d82
1
Parent(s):
d06aee8
feat/
Browse files* Added debug print statements
* Replace max token values
* overhauled the thinking_prompt message
app.py
CHANGED
@@ -15,9 +15,8 @@ import traceback
|
|
15 |
print(f"Is CUDA available: {torch.cuda.is_available()}")
|
16 |
print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")
|
17 |
|
18 |
-
|
19 |
-
DEFAULT_MAX_NEW_TOKENS = 0.65*
|
20 |
-
MAX_INPUT_TOKEN_LENGTH = 100000
|
21 |
DEFAULT_SYSTEM_PROMPT = """
|
22 |
Tu es un expert en extraction de données dans des documents très longs et bruités.
|
23 |
Tu comprends le sujet grâce à des liens sémantiques que tu peux extraire.
|
@@ -79,19 +78,6 @@ if torch.cuda.is_available():
|
|
79 |
device_map="auto")
|
80 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
81 |
|
82 |
-
# New helper function to create a thinking message
|
83 |
-
def create_thinking_message(content: str, status: str = None) -> Dict[str, Any]:
|
84 |
-
"""Creates a thinking message with metadata for display in the chatbot."""
|
85 |
-
return {
|
86 |
-
"role": "assistant",
|
87 |
-
"content": content,
|
88 |
-
"metadata": {
|
89 |
-
"title": "🧠 Réflexion",
|
90 |
-
"status": status
|
91 |
-
}
|
92 |
-
}
|
93 |
-
|
94 |
-
|
95 |
# Helper function to generate responses from the LLM
|
96 |
def generate_llm_response(
|
97 |
conversation: List[Dict[str, str]],
|
@@ -143,7 +129,6 @@ def generate_llm_response(
|
|
143 |
|
144 |
return "".join(outputs)
|
145 |
|
146 |
-
|
147 |
def append_text_knowledge(file_path: str) -> str:
|
148 |
"""
|
149 |
Reads content from a selected file and returns it as a string.
|
@@ -206,9 +191,9 @@ with gr.Blocks() as demo:
|
|
206 |
max_tokens_slider = gr.Slider(
|
207 |
label="Max new tokens",
|
208 |
minimum=1,
|
209 |
-
maximum=
|
210 |
step=1,
|
211 |
-
value=
|
212 |
)
|
213 |
temperature_slider = gr.Slider(
|
214 |
label="Temperature",
|
@@ -307,7 +292,7 @@ with gr.Blocks() as demo:
|
|
307 |
for msg in history[:-2]: # All msg except user message and thinking part
|
308 |
thinking_conversation.append(msg)
|
309 |
|
310 |
-
thinking_prompt = user_message + "\n\nRéfléchis étape par étape.
|
311 |
thinking_conversation.append({"role": "user", "content": thinking_prompt})
|
312 |
|
313 |
# GENERATE THINKING
|
@@ -330,11 +315,11 @@ with gr.Blocks() as demo:
|
|
330 |
}
|
331 |
}
|
332 |
yield history
|
333 |
-
|
|
|
334 |
final_conversation = []
|
335 |
if system_prompt:
|
336 |
final_conversation.append({"role": "system", "content": system_prompt})
|
337 |
-
|
338 |
if knowledge:
|
339 |
final_conversation.append({
|
340 |
"role": "assistant",
|
@@ -347,7 +332,7 @@ with gr.Blocks() as demo:
|
|
347 |
|
348 |
final_conversation.append({
|
349 |
"role": "assistant",
|
350 |
-
"content": f"Voici mon analyse étape par étape:\n{thinking_result}\n\nMaintenant je vais formaliser le
|
351 |
})
|
352 |
final_answer = generate_llm_response(
|
353 |
final_conversation,
|
@@ -362,7 +347,8 @@ with gr.Blocks() as demo:
|
|
362 |
"content": final_answer
|
363 |
})
|
364 |
yield history
|
365 |
-
|
|
|
366 |
except Exception as e:
|
367 |
error_traceback = traceback.format_exc()
|
368 |
print(f"Error traceback:\n{error_traceback}")
|
|
|
15 |
print(f"Is CUDA available: {torch.cuda.is_available()}")
|
16 |
print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")
|
17 |
|
18 |
+
MAX_NEW_TOKENS = 2**13
|
19 |
+
DEFAULT_MAX_NEW_TOKENS = 0.65*MAX_NEW_TOKENS
|
|
|
20 |
DEFAULT_SYSTEM_PROMPT = """
|
21 |
Tu es un expert en extraction de données dans des documents très longs et bruités.
|
22 |
Tu comprends le sujet grâce à des liens sémantiques que tu peux extraire.
|
|
|
78 |
device_map="auto")
|
79 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
80 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
81 |
# Helper function to generate responses from the LLM
|
82 |
def generate_llm_response(
|
83 |
conversation: List[Dict[str, str]],
|
|
|
129 |
|
130 |
return "".join(outputs)
|
131 |
|
|
|
132 |
def append_text_knowledge(file_path: str) -> str:
|
133 |
"""
|
134 |
Reads content from a selected file and returns it as a string.
|
|
|
191 |
max_tokens_slider = gr.Slider(
|
192 |
label="Max new tokens",
|
193 |
minimum=1,
|
194 |
+
maximum=MAX_NEW_TOKENS,
|
195 |
step=1,
|
196 |
+
value=DEFAULT_MAX_NEW_TOKENS
|
197 |
)
|
198 |
temperature_slider = gr.Slider(
|
199 |
label="Temperature",
|
|
|
292 |
for msg in history[:-2]: # All msg except user message and thinking part
|
293 |
thinking_conversation.append(msg)
|
294 |
|
295 |
+
thinking_prompt = user_message + "\n\nRéfléchis étape par étape. D'abord identifie l'intention de l'utilisateur. Quand tu as compris ce qui t'est demandé, commence à établir un plan clair et précis que tu peux suivre. Utilise l'italic et le gras en Markdown pour séquencer et prioriser tes actions."
|
296 |
thinking_conversation.append({"role": "user", "content": thinking_prompt})
|
297 |
|
298 |
# GENERATE THINKING
|
|
|
315 |
}
|
316 |
}
|
317 |
yield history
|
318 |
+
print("DEBUG:\t\tYielded history of ```thinking_result```")
|
319 |
+
|
320 |
final_conversation = []
|
321 |
if system_prompt:
|
322 |
final_conversation.append({"role": "system", "content": system_prompt})
|
|
|
323 |
if knowledge:
|
324 |
final_conversation.append({
|
325 |
"role": "assistant",
|
|
|
332 |
|
333 |
final_conversation.append({
|
334 |
"role": "assistant",
|
335 |
+
"content": f"Voici mon analyse étape par étape:\n{thinking_result}\n\nMaintenant je vais formaliser le résultat final."
|
336 |
})
|
337 |
final_answer = generate_llm_response(
|
338 |
final_conversation,
|
|
|
347 |
"content": final_answer
|
348 |
})
|
349 |
yield history
|
350 |
+
print("DEBUG:\t\tYielded history of ```final_answer```")
|
351 |
+
|
352 |
except Exception as e:
|
353 |
error_traceback = traceback.format_exc()
|
354 |
print(f"Error traceback:\n{error_traceback}")
|