Spaces:

Somekindofathing
/

ontology-individuals-filler

Paused

App Files Files Community

Somekindofa commited on Mar 3

Commit

f7f7d8c

1 Parent(s): 23caf6d

Feat/ Implemented CoT

Browse files

Files changed (1) hide show

app.py +118 -45

app.py CHANGED Viewed

@@ -5,7 +5,7 @@ import accelerate
 from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, TextIteratorStreamer
 import os
 import torch
-from typing import Optional, Iterator
 from threading import Thread
 from types import NoneType
@@ -827,6 +827,18 @@ if torch.cuda.is_available():
                                                  device_map="auto")
     tokenizer = AutoTokenizer.from_pretrained(model_id)
 @spaces.GPU
 def generate(
     message: str,
@@ -834,57 +846,120 @@ def generate(
     knowledge: str,  # added knowledge parameter
     system_prompt: str = DEFAULT_SYSTEM_PROMPT,
     max_new_tokens: int = 1024,
-    temperature: float = 0.6,
-    top_p: float = 0.9,
     top_k: int = 50,
-    repetition_penalty: float = 1.2
-) -> Iterator[str]:
     try:
-        conversation = []
         if system_prompt:
-            conversation.append({"role": "system", "content": system_prompt})
         if knowledge:
-            conversation.append({"role": "assistant", "content": f"This is your knowledge: {knowledge}"})
-        conversation += chat_history
-        conversation.append({"role": "user", "content": message})
-        input_ids = tokenizer.apply_chat_template(conversation,
-                                                return_tensors="pt",
-                                                add_generation_prompt=True)
-        input_ids = input_ids.to(model.device)
-        streamer = TextIteratorStreamer(tokenizer,
-                                        timeout=2*60.0,
-                                        skip_prompt=True,
-                                        skip_special_tokens=True)
-        generate_kwargs = dict(
-            {"input_ids": input_ids},
-            streamer=streamer,
             max_new_tokens=max_new_tokens,
-            do_sample=True,
             top_p=top_p,
             top_k=top_k,
-            temperature=temperature,
-            num_beams=1,
-            repetition_penalty=repetition_penalty,
-            pad_token_id=tokenizer.eos_token_id,
         )
-        t = Thread(target=model.generate,
-                kwargs=generate_kwargs)
-        t.start()
-        outputs = []
-        for text in streamer:
-            outputs.append(text)
-            yield "".join(outputs)
     except Exception as e:
-        yield f"An error occurred: {str(e)}"
 def append_text_knowledge(file_path: str) -> str:
     """
@@ -910,6 +985,7 @@ knowledge_textbox = gr.Textbox(
     lines= 20,
     visible=False
 )
 chat_interface = gr.ChatInterface(
         fn=generate,
         type="messages",
@@ -929,14 +1005,14 @@ chat_interface = gr.ChatInterface(
                 minimum=0.1,
                 maximum=4.0,
                 step=0.1,
-                value=0.6,
             ),
             gr.Slider(
                 label="Top-p (nucleus sampling)",
                 minimum=0.05,
                 maximum=1.0,
                 step=0.05,
-                value=0.9,
             ),
             gr.Slider(
                 label="Top-k",
@@ -950,13 +1026,10 @@ chat_interface = gr.ChatInterface(
                 minimum=1.0,
                 maximum=2.0,
                 step=0.05,
-                value=1.2,
             ),
         ],
         stop_btn=True,
-        examples=[
-            ["In bullet-points, give me the classes from that Turtle ontology :"]
-        ],
         cache_examples=False,
         show_progress="full",
         run_examples_on_click=False

 from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, TextIteratorStreamer
 import os
 import torch
+from typing import Optional, Iterator, Dict, Any, List
 from threading import Thread
 from types import NoneType
                                                  device_map="auto")
     tokenizer = AutoTokenizer.from_pretrained(model_id)
+# New helper function to create a thinking message
+def create_thinking_message(content: str, status: str = None) -> Dict[str, Any]:
+    """Creates a thinking message with metadata for display in the chatbot."""
+    return {
+        "role": "assistant",
+        "content": content,
+        "metadata": {
+            "title": "🧠 Réflexion",
+            "status": status
+        }
+    }
 @spaces.GPU
 def generate(
     message: str,
     knowledge: str,  # added knowledge parameter
     system_prompt: str = DEFAULT_SYSTEM_PROMPT,
     max_new_tokens: int = 1024,
+    temperature: float = 0.2,
+    top_p: float = 0.8,
     top_k: int = 50,
+    repetition_penalty: float = 1.0
+) -> Iterator[Dict[str, Any]]:
     try:
+        thinking_conversation = []
         if system_prompt:
+            thinking_conversation.append({"role": "system",
+                                 "content": system_prompt})
         if knowledge:
+            thinking_conversation.append({"role": "assistant",
+                                 "content": f"Voici l'ontologie existante que je dois comprendre: {knowledge}\n\nJe vais l'analyser étape par étape."})
+        thinking_conversation += chat_history
+        # Thinking prompt
+        thinking_prompt = message + "\n\nRéfléchis étape par étape. Identifie d'abord les entités, puis les relations, puis organise hiérarchiquement avant de formaliser."
+        thinking_conversation.append({"role": "user",
+                                      "content": thinking_prompt})
+        thinking_message = create_thinking_message(content="Réflexion en cours...",
+                                                   status="pending")
+        yield thinking_message
+        thinking_result = generate_llm_response(
+            thinking_conversation,
+            max_new_tokens=max_new_tokens * 2,
+            temperature=temperature,
+            top_p=top_p,
+            top_k=top_k,
+            repetition_penalty=repetition_penalty
+        )
+        thinking_message = create_thinking_message(thinking_result, status="done")
+        yield thinking_message
+        # Final Answer
+        final_conversation = []
+        final_conversation.append({"role": "system", "content": system_prompt})
+        if knowledge:
+            final_conversation.append({"role": "assistant", "content": f"J'ai analysé ce texte: {knowledge}"})
+        final_conversation += chat_history
+        final_answer = generate_llm_response(
+            final_conversation,
             max_new_tokens=max_new_tokens,
+            temperature=temperature * 0.8,  # Even lower temperature for final answer
             top_p=top_p,
             top_k=top_k,
+            repetition_penalty=repetition_penalty
         )
+        # Yield the final answer
+        yield {
+            "role": "assistant",
+            "content": final_answer
+        }
     except Exception as e:
+        yield {
+            "role": "assistant",
+            "content": f"An error occurred: {str(e)}"
+        }
+# Helper function to generate responses from the LLM
+def generate_llm_response(
+    conversation: List[Dict[str, str]],
+    max_new_tokens: int,
+    temperature: float,
+    top_p: float,
+    top_k: int,
+    repetition_penalty: float
+) -> str:
+    """Generate a response from the LLM based on the conversation."""
+    input_ids = tokenizer.apply_chat_template(
+        conversation,
+        return_tensors="pt",
+        add_generation_prompt=True
+    )
+    input_ids = input_ids.to(model.device)
+    streamer = TextIteratorStreamer(
+        tokenizer,
+        timeout=2*60.0,
+        skip_prompt=True,
+        skip_special_tokens=True
+    )
+    generate_kwargs = dict(
+        {"input_ids": input_ids},
+        streamer=streamer,
+        max_new_tokens=max_new_tokens,
+        do_sample=True,
+        top_p=top_p,
+        top_k=top_k,
+        temperature=temperature,
+        num_beams=1,
+        repetition_penalty=repetition_penalty,
+        pad_token_id=tokenizer.eos_token_id,
+    )
+    t = Thread(
+        target=model.generate,
+        kwargs=generate_kwargs
+    )
+    t.start()
+    # Collect the output
+    outputs = []
+    for text in streamer:
+        outputs.append(text)
+    return "".join(outputs)
 def append_text_knowledge(file_path: str) -> str:
     """
     lines= 20,
     visible=False
 )
 chat_interface = gr.ChatInterface(
         fn=generate,
         type="messages",
                 minimum=0.1,
                 maximum=4.0,
                 step=0.1,
+                value=0.2,
             ),
             gr.Slider(
                 label="Top-p (nucleus sampling)",
                 minimum=0.05,
                 maximum=1.0,
                 step=0.05,
+                value=0.8,
             ),
             gr.Slider(
                 label="Top-k",
                 minimum=1.0,
                 maximum=2.0,
                 step=0.05,
+                value=1.0,
             ),
         ],
         stop_btn=True,
         cache_examples=False,
         show_progress="full",
         run_examples_on_click=False