Spaces:

kheopss
/

METROPOLE_CHATBOT_FINAL

Sleeping

App Files Files Community

kheopss commited on Aug 24, 2024

Commit

d2399db

verified ·

1 Parent(s): 3c11d2d

Update app.py

Browse files

Files changed (1) hide show

app.py +38 -13

app.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import nest_asyncio
 import gradio as gr
 from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
 from llama_index.core.postprocessor import LLMRerank
 import logging
@@ -19,7 +20,7 @@ from llama_index.core.retrievers import VectorIndexRetriever
 from llama_index.core import QueryBundle
 import time
 from huggingface_hub import login
 nest_asyncio.apply()
 hf_token = os.getenv('hf_token')
@@ -93,14 +94,19 @@ def get_all_text(new_nodes):
     for i, node in enumerate(new_nodes, 1):
         texts.append(f"\nDocument {i} : {node.get_text()}")
     return ' '.join(texts)
-def completion_to(text,user_p):
-    system_p = "You are a conversational AI assistant tasked with helping public agents in Nice guide residents and citizens to appropriate services. You will respond to user queries using information from provided documents. Your answer mode can be 'Grounded' or 'Mixed'. In 'Grounded' mode, use only exact facts from the documents, citing them with <co: doc_id></co> tags. In 'Mixed' mode, you can incorporate both document facts and your own knowledge. Always respond in French, keeping your answers grounded in the document text and engaging in conversation to assist based on user questions."
-    return f"<|im_start|>system{system_p}\n DOCUMENTS : \n {text}\n <|im_end|><|im_start|>user \n{user_p}\n<|im_end|><|im_start|>assistant"
-def process_final(user_prom, history):
     import time
     all_process_start = time.time()
     new_nodes = get_retrieved_nodes(
       user_prom,
       vector_top_k=5,
@@ -108,18 +114,37 @@ def process_final(user_prom, history):
       with_reranker=True,
     )
     get_texts = get_all_text(new_nodes)
-    prompting = completion_to(get_texts,user_prom)
     print("PHASE 03 passing to LLM\n")
-    phase_03_start = time.time()
-    gen =llm.stream_complete(formatted=True, prompt=prompting)
-    # phase_03_end = time.time()
-    # all_process_end = time.time()
-    # print(f"Phase 03 (LLM) took {phase_03_end - phase_03_start} seconds")
-    # print(f"All process took {all_process_end - all_process_start} seconds")
-    # llm.stream_complete(formatted=True, prompt=prompting)
     for response in gen:
         yield response.text
 description = """
 <p>
 <center>

 import nest_asyncio
 import gradio as gr
+import tiktoken
 from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
 from llama_index.core.postprocessor import LLMRerank
 import logging
 from llama_index.core import QueryBundle
 import time
 from huggingface_hub import login
+from gradio import ChatMessage
 nest_asyncio.apply()
 hf_token = os.getenv('hf_token')
     for i, node in enumerate(new_nodes, 1):
         texts.append(f"\nDocument {i} : {node.get_text()}")
     return ' '.join(texts)
+# Charger le tokenizer cl100k_base
+encoding = tiktoken.get_encoding("cl100k_base")
+def estimate_tokens(text):
+    # Encoder le texte pour obtenir les tokens
+    tokens = encoding.encode(text)
+    return len(tokens)
+def process_final(user_prom,history):
     import time
     all_process_start = time.time()
+    system_p = "You are a conversational AI assistant tasked with helping public agents in Nice guide residents and citizens to appropriate services. You will respond to user queries using information from provided documents. Your answer mode can be 'Grounded' or 'Mixed'. In 'Grounded' mode, use only exact facts from the documents, citing them with <co: doc_id></co> tags. In 'Mixed' mode, you can incorporate both document facts and your own knowledge. Always respond in French, keeping your answers grounded in the document text and engaging in conversation to assist based on user questions."
     new_nodes = get_retrieved_nodes(
       user_prom,
       vector_top_k=5,
       with_reranker=True,
     )
     get_texts = get_all_text(new_nodes)
     print("PHASE 03 passing to LLM\n")
+    prompt_f = f"<|im_start|>system \n{system_p}\n DOCUMENTS {get_texts}\n<|im_end|>"
+    total_tokens = estimate_tokens(prompt_f)
+    # Ajout de l'historique en commençant par les plus récents
+    for val in reversed(history):
+        user_text = f" <|im_start|>user \n {val[0]}\n<|im_end|>" if val[0] else ""
+        assistant_text = f" <|im_start|>assistant \n {val[1]}\n<|im_end|>" if val[1] else ""
+        current_tokens = estimate_tokens(user_text + assistant_text)
+        # Vérifier si l'ajout de cet historique dépasse la limite
+        if total_tokens + current_tokens > 3000:
+            break  # Arrêter l'ajout si on dépasse la limite
+        else:
+            # Ajouter à `prompt_f` et mettre à jour le nombre total de tokens
+            prompt_f += user_text + assistant_text
+            total_tokens += current_tokens
+            print (f"le nombre TOTAL de token : {current_tokens}\n")
+    prompt_f+=f" <|im_start|>user \n{user_prom} \n<|im_end|><|im_start|>assistant \n"
+    phase_03_start = time.time()
+    gen =llm.stream_complete(formatted=True, prompt=prompt_f)
+    print("_"*100)
+    print(prompt_f)
+    print("o"*100)
     for response in gen:
         yield response.text
 description = """
 <p>
 <center>