Spaces:

Somekindofathing
/

ontology-individuals-filler

Paused

App Files Files Community

theosaurus commited on Mar 5

Commit

ed36d82

1 Parent(s): d06aee8

feat/

Browse files

* Added debug print statements
* Replace max token values
* overhauled the thinking_prompt message

Files changed (1) hide show

app.py +10 -24

app.py CHANGED Viewed

@@ -15,9 +15,8 @@ import traceback
 print(f"Is CUDA available: {torch.cuda.is_available()}")
 print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")
-MAX_MAX_NEW_TOKENS = 14000
-DEFAULT_MAX_NEW_TOKENS = 0.65*MAX_MAX_NEW_TOKENS
-MAX_INPUT_TOKEN_LENGTH = 100000
 DEFAULT_SYSTEM_PROMPT = """
 Tu es un expert en extraction de données dans des documents très longs et bruités.
 Tu comprends le sujet grâce à des liens sémantiques que tu peux extraire.
@@ -79,19 +78,6 @@ if torch.cuda.is_available():
                                                  device_map="auto")
     tokenizer = AutoTokenizer.from_pretrained(model_id)
-# New helper function to create a thinking message
-def create_thinking_message(content: str, status: str = None) -> Dict[str, Any]:
-    """Creates a thinking message with metadata for display in the chatbot."""
-    return {
-        "role": "assistant",
-        "content": content,
-        "metadata": {
-            "title": "🧠 Réflexion",
-            "status": status
-        }
-    }
 # Helper function to generate responses from the LLM
 def generate_llm_response(
     conversation: List[Dict[str, str]],
@@ -143,7 +129,6 @@ def generate_llm_response(
     return "".join(outputs)
 def append_text_knowledge(file_path: str) -> str:
     """
     Reads content from a selected file and returns it as a string.
@@ -206,9 +191,9 @@ with gr.Blocks() as demo:
                 max_tokens_slider = gr.Slider(
                     label="Max new tokens",
                     minimum=1,
-                    maximum=14000,
                     step=1,
-                    value=1024
                 )
                 temperature_slider = gr.Slider(
                     label="Temperature",
@@ -307,7 +292,7 @@ with gr.Blocks() as demo:
             for msg in history[:-2]: # All msg except user message and thinking part
                 thinking_conversation.append(msg)
-            thinking_prompt = user_message + "\n\nRéfléchis étape par étape. Identifie d'abord les entités, puis les relations, puis organise hiérarchiquement avant de formaliser."
             thinking_conversation.append({"role": "user", "content": thinking_prompt})
             # GENERATE THINKING
@@ -330,11 +315,11 @@ with gr.Blocks() as demo:
                 }
             }
             yield history
             final_conversation = []
             if system_prompt:
                     final_conversation.append({"role": "system", "content": system_prompt})
             if knowledge:
                 final_conversation.append({
                     "role": "assistant",
@@ -347,7 +332,7 @@ with gr.Blocks() as demo:
             final_conversation.append({
                 "role": "assistant",
-                "content": f"Voici mon analyse étape par étape:\n{thinking_result}\n\nMaintenant je vais formaliser le résulat final."
             })
             final_answer = generate_llm_response(
                 final_conversation,
@@ -362,7 +347,8 @@ with gr.Blocks() as demo:
                 "content": final_answer
             })
             yield history
         except Exception as e:
             error_traceback = traceback.format_exc()
             print(f"Error traceback:\n{error_traceback}")

 print(f"Is CUDA available: {torch.cuda.is_available()}")
 print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")
+MAX_NEW_TOKENS = 2**13
+DEFAULT_MAX_NEW_TOKENS = 0.65*MAX_NEW_TOKENS
 DEFAULT_SYSTEM_PROMPT = """
 Tu es un expert en extraction de données dans des documents très longs et bruités.
 Tu comprends le sujet grâce à des liens sémantiques que tu peux extraire.
                                                  device_map="auto")
     tokenizer = AutoTokenizer.from_pretrained(model_id)
 # Helper function to generate responses from the LLM
 def generate_llm_response(
     conversation: List[Dict[str, str]],
     return "".join(outputs)
 def append_text_knowledge(file_path: str) -> str:
     """
     Reads content from a selected file and returns it as a string.
                 max_tokens_slider = gr.Slider(
                     label="Max new tokens",
                     minimum=1,
+                    maximum=MAX_NEW_TOKENS,
                     step=1,
+                    value=DEFAULT_MAX_NEW_TOKENS
                 )
                 temperature_slider = gr.Slider(
                     label="Temperature",
             for msg in history[:-2]: # All msg except user message and thinking part
                 thinking_conversation.append(msg)
+            thinking_prompt = user_message + "\n\nRéfléchis étape par étape. D'abord identifie l'intention de l'utilisateur. Quand tu as compris ce qui t'est demandé, commence à établir un plan clair et précis que tu peux suivre. Utilise l'italic et le gras en Markdown pour séquencer et prioriser tes actions."
             thinking_conversation.append({"role": "user", "content": thinking_prompt})
             # GENERATE THINKING
                 }
             }
             yield history
+            print("DEBUG:\t\tYielded history of ```thinking_result```")
             final_conversation = []
             if system_prompt:
                     final_conversation.append({"role": "system", "content": system_prompt})
             if knowledge:
                 final_conversation.append({
                     "role": "assistant",
             final_conversation.append({
                 "role": "assistant",
+                "content": f"Voici mon analyse étape par étape:\n{thinking_result}\n\nMaintenant je vais formaliser le résultat final."
             })
             final_answer = generate_llm_response(
                 final_conversation,
                 "content": final_answer
             })
             yield history
+            print("DEBUG:\t\tYielded history of ```final_answer```")
         except Exception as e:
             error_traceback = traceback.format_exc()
             print(f"Error traceback:\n{error_traceback}")