kheopss commited on
Commit
d2399db
·
verified ·
1 Parent(s): 3c11d2d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -13
app.py CHANGED
@@ -1,5 +1,6 @@
1
  import nest_asyncio
2
  import gradio as gr
 
3
  from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
4
  from llama_index.core.postprocessor import LLMRerank
5
  import logging
@@ -19,7 +20,7 @@ from llama_index.core.retrievers import VectorIndexRetriever
19
  from llama_index.core import QueryBundle
20
  import time
21
  from huggingface_hub import login
22
-
23
  nest_asyncio.apply()
24
  hf_token = os.getenv('hf_token')
25
 
@@ -93,14 +94,19 @@ def get_all_text(new_nodes):
93
  for i, node in enumerate(new_nodes, 1):
94
  texts.append(f"\nDocument {i} : {node.get_text()}")
95
  return ' '.join(texts)
96
- def completion_to(text,user_p):
97
- system_p = "You are a conversational AI assistant tasked with helping public agents in Nice guide residents and citizens to appropriate services. You will respond to user queries using information from provided documents. Your answer mode can be 'Grounded' or 'Mixed'. In 'Grounded' mode, use only exact facts from the documents, citing them with <co: doc_id></co> tags. In 'Mixed' mode, you can incorporate both document facts and your own knowledge. Always respond in French, keeping your answers grounded in the document text and engaging in conversation to assist based on user questions."
98
- return f"<|im_start|>system{system_p}\n DOCUMENTS : \n {text}\n <|im_end|><|im_start|>user \n{user_p}\n<|im_end|><|im_start|>assistant"
99
 
 
 
 
 
 
 
 
100
 
101
- def process_final(user_prom, history):
102
  import time
103
  all_process_start = time.time()
 
104
  new_nodes = get_retrieved_nodes(
105
  user_prom,
106
  vector_top_k=5,
@@ -108,18 +114,37 @@ def process_final(user_prom, history):
108
  with_reranker=True,
109
  )
110
  get_texts = get_all_text(new_nodes)
111
- prompting = completion_to(get_texts,user_prom)
112
  print("PHASE 03 passing to LLM\n")
113
- phase_03_start = time.time()
114
- gen =llm.stream_complete(formatted=True, prompt=prompting)
115
- # phase_03_end = time.time()
116
- # all_process_end = time.time()
117
- # print(f"Phase 03 (LLM) took {phase_03_end - phase_03_start} seconds")
118
- # print(f"All process took {all_process_end - all_process_start} seconds")
119
- # llm.stream_complete(formatted=True, prompt=prompting)
120
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
121
  for response in gen:
122
  yield response.text
 
123
  description = """
124
  <p>
125
  <center>
 
1
  import nest_asyncio
2
  import gradio as gr
3
+ import tiktoken
4
  from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
5
  from llama_index.core.postprocessor import LLMRerank
6
  import logging
 
20
  from llama_index.core import QueryBundle
21
  import time
22
  from huggingface_hub import login
23
+ from gradio import ChatMessage
24
  nest_asyncio.apply()
25
  hf_token = os.getenv('hf_token')
26
 
 
94
  for i, node in enumerate(new_nodes, 1):
95
  texts.append(f"\nDocument {i} : {node.get_text()}")
96
  return ' '.join(texts)
 
 
 
97
 
98
+ # Charger le tokenizer cl100k_base
99
+ encoding = tiktoken.get_encoding("cl100k_base")
100
+
101
+ def estimate_tokens(text):
102
+ # Encoder le texte pour obtenir les tokens
103
+ tokens = encoding.encode(text)
104
+ return len(tokens)
105
 
106
+ def process_final(user_prom,history):
107
  import time
108
  all_process_start = time.time()
109
+ system_p = "You are a conversational AI assistant tasked with helping public agents in Nice guide residents and citizens to appropriate services. You will respond to user queries using information from provided documents. Your answer mode can be 'Grounded' or 'Mixed'. In 'Grounded' mode, use only exact facts from the documents, citing them with <co: doc_id></co> tags. In 'Mixed' mode, you can incorporate both document facts and your own knowledge. Always respond in French, keeping your answers grounded in the document text and engaging in conversation to assist based on user questions."
110
  new_nodes = get_retrieved_nodes(
111
  user_prom,
112
  vector_top_k=5,
 
114
  with_reranker=True,
115
  )
116
  get_texts = get_all_text(new_nodes)
117
+
118
  print("PHASE 03 passing to LLM\n")
119
+ prompt_f = f"<|im_start|>system \n{system_p}\n DOCUMENTS {get_texts}\n<|im_end|>"
120
+ total_tokens = estimate_tokens(prompt_f)
 
 
 
 
 
121
 
122
+ # Ajout de l'historique en commençant par les plus récents
123
+ for val in reversed(history):
124
+ user_text = f" <|im_start|>user \n {val[0]}\n<|im_end|>" if val[0] else ""
125
+ assistant_text = f" <|im_start|>assistant \n {val[1]}\n<|im_end|>" if val[1] else ""
126
+
127
+ current_tokens = estimate_tokens(user_text + assistant_text)
128
+
129
+
130
+ # Vérifier si l'ajout de cet historique dépasse la limite
131
+ if total_tokens + current_tokens > 3000:
132
+ break # Arrêter l'ajout si on dépasse la limite
133
+ else:
134
+ # Ajouter à `prompt_f` et mettre à jour le nombre total de tokens
135
+ prompt_f += user_text + assistant_text
136
+ total_tokens += current_tokens
137
+ print (f"le nombre TOTAL de token : {current_tokens}\n")
138
+ prompt_f+=f" <|im_start|>user \n{user_prom} \n<|im_end|><|im_start|>assistant \n"
139
+
140
+ phase_03_start = time.time()
141
+ gen =llm.stream_complete(formatted=True, prompt=prompt_f)
142
+ print("_"*100)
143
+ print(prompt_f)
144
+ print("o"*100)
145
  for response in gen:
146
  yield response.text
147
+
148
  description = """
149
  <p>
150
  <center>