Update djezzy.py
Browse files
djezzy.py
CHANGED
@@ -37,22 +37,146 @@ from datasets import Dataset
|
|
37 |
import re
|
38 |
import pandas as pd
|
39 |
import os
|
|
|
|
|
|
|
|
|
|
|
40 |
|
41 |
-
with open('/content/mots_clés.txt', 'r') as fichier:
|
42 |
-
contenu = fichier.read()
|
43 |
|
44 |
-
|
45 |
-
|
46 |
|
47 |
-
# Afficher les mots pour vérifier
|
48 |
-
for mot in mots:
|
49 |
-
print(mot.strip())
|
50 |
|
51 |
-
|
52 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
53 |
|
54 |
from langchain.embeddings import SentenceTransformerEmbeddings
|
55 |
|
56 |
model_name = "sentence-transformers/all-MiniLM-L6-v2"
|
57 |
|
58 |
embedding_llm = SentenceTransformerEmbeddings(model_name=model_name)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
import re
|
38 |
import pandas as pd
|
39 |
import os
|
40 |
+
from langchain.vectorstores import FAISS
|
41 |
+
from sklearn.metrics.pairwise import cosine_similarity
|
42 |
+
def load_data(text_filename='docs_text.json', embeddings_filename='docs_embeddings.json'):
|
43 |
+
import json
|
44 |
+
import pickle
|
45 |
|
|
|
|
|
46 |
|
47 |
+
with open(text_filename, 'r', encoding='utf-8') as f:
|
48 |
+
docs_text = json.load(f)
|
49 |
|
|
|
|
|
|
|
50 |
|
51 |
+
with open(embeddings_filename, 'r') as f:
|
52 |
+
docs_embeddings = json.load(f)
|
53 |
+
|
54 |
+
return docs_text, docs_embeddings
|
55 |
+
#docs_text, docs_embeddings = load_data()
|
56 |
+
def mot_cle(path)
|
57 |
+
with open(path, 'r') as fichier:
|
58 |
+
contenu = fichier.read()
|
59 |
+
|
60 |
+
# Séparer les mots en utilisant la virgule comme séparateur
|
61 |
+
mots = contenu.split(',')
|
62 |
+
|
63 |
+
# Afficher les mots pour vérifier
|
64 |
+
for mot in mots:
|
65 |
+
print(mot.strip())
|
66 |
+
|
67 |
+
# stocker les mots dans un tableau (une liste)
|
68 |
+
tableau_de_mots = [mot.strip() for mot in mots]
|
69 |
+
return tableau_de_mots
|
70 |
|
71 |
from langchain.embeddings import SentenceTransformerEmbeddings
|
72 |
|
73 |
model_name = "sentence-transformers/all-MiniLM-L6-v2"
|
74 |
|
75 |
embedding_llm = SentenceTransformerEmbeddings(model_name=model_name)
|
76 |
+
def vector(path)
|
77 |
+
loaded_vector_db = FAISS.load_local(path, embedding_llm, allow_dangerous_deserialization=True)
|
78 |
+
return loaded_vector_db
|
79 |
+
def pip(question)
|
80 |
+
query_text = question
|
81 |
+
query_embedding = embedding_llm.embed_query(query_text)
|
82 |
+
query_embedding_array = np.array(query_embedding)
|
83 |
+
docs_embeddings=np.array(docs_embeddings)
|
84 |
+
|
85 |
+
|
86 |
+
|
87 |
+
# Question à analyser
|
88 |
+
question = query_text
|
89 |
+
|
90 |
+
# Convertir la question en une liste de mots
|
91 |
+
mots_question = question.lower().split()
|
92 |
+
bi_grammes = [' '.join([mots_question[i], mots_question[i+1]]) for i in range(len(mots_question)-1)]
|
93 |
+
#mots_a_verifier_lower=[mot.lower() for mot in mots_a_verifier]
|
94 |
+
mots_a_verifier_lower = {mot.lower(): mot for mot in mots_a_verifier}
|
95 |
+
mots_question_lower=[mot.lower() for mot in mots_question]
|
96 |
+
bi_grammes_lower=[mot.lower() for mot in bi_grammes]
|
97 |
+
# Trouver les mots de la question qui sont dans le tableau
|
98 |
+
mots_trouves1 = [mots_a_verifier_lower[mot] for mot in mots_a_verifier_lower if mot in bi_grammes_lower]
|
99 |
+
if not mots_trouves1:
|
100 |
+
mots_trouves1 = [mots_a_verifier_lower[mot] for mot in mots_a_verifier_lower if mot in mots_question_lower ]
|
101 |
+
# Afficher les mots trouvés
|
102 |
+
|
103 |
+
mots_trouves=mots_trouves1
|
104 |
+
if not mots_trouves:
|
105 |
+
|
106 |
+
similarities = [cosine_similarity(doc.reshape(1,-1), query_embedding_array.reshape(1,-1)) for doc in docs_embeddings]
|
107 |
+
sorted_docs = sorted(zip(docs_text, docs_embeddings, similarities), key=lambda x: x[2], reverse=True)
|
108 |
+
similar_docs1 = [(doc,sim) for doc, _, sim in sorted_docs if sim > 0.72]
|
109 |
+
if not similar_docs1:
|
110 |
+
similar_docs2 = [(doc,sim) for doc, _, sim in sorted_docs if sim > 0.65]
|
111 |
+
if not similar_docs2:
|
112 |
+
similar_docs = [(doc,sim) for doc, _, sim in sorted_docs if sim > 0.4]
|
113 |
+
if not similar_docs:
|
114 |
+
similar_docsA = [(doc,sim) for doc, _, sim in sorted_docs if (sim >= 0.3 and sim<0.4)]
|
115 |
+
if not similar_docsA:
|
116 |
+
print("As a chatbot for Djezzy, I can provide information exclusively about our affiliated companies. Unfortunately, I'm unable to respond to inquiries outside of that scope.")
|
117 |
+
generate2="As a chatbot for Djezzy, I can provide information exclusively about our affiliated companies. Unfortunately, I'm unable to respond to inquiries outside of that scope."
|
118 |
+
generates.append(generate2)
|
119 |
+
else:
|
120 |
+
print("I apologize, I don't fully understand your question. You can contact our customer service for answers to your needs, or if you can provide more details, I would be happy to help.")
|
121 |
+
generate1="I apologize, I don't fully understand your question. You can contact our customer service for answers to your needs, or if you can provide more details, I would be happy to help."
|
122 |
+
generates.append(generate1)
|
123 |
+
else:
|
124 |
+
context="\n---------------------\n".join([doc for doc,_ in similar_docs[:4]]if len(similar_docs) >=3 else [doc for doc, _ in similar_docs[:1]])
|
125 |
+
system_message=" "
|
126 |
+
prompt = f"[INST] <<SYS>>\n As Djezzy's chatbot\nread each paraphrase in the context and Answer the question .\ndo not take into consideration the paragraphs which have no relation to the question\n if there is not a paragraph that is related to the question, respond that for this question it's best to reach out to our customer service team . They'll be able to assist you with your needs\n just give me the answer I don't want any other details \n ###context:{context}<</SYS>>\n\n ###question: {query_text} [/INST]"
|
127 |
+
#prompt = f" <bos><start_of_turn>user \n read each paraphrase in the context and Answer the question .\ndo not take into consideration the paragraphs which have no relation to the question\n if there is not a paragraph that is related to the question, respond that for this question it's best to reach out to our customer service team . They'll be able to assist you with your needs\n just give me the answer I don't want any other details \n###context:\n{contexts[0]}\n ###question:\nWhat are the benefits of opting for the Djezzy Legend 100 DA package? \n###answer:\n{reponses[0]}<eos>\nuser \n read each paraphrase in the context and Answer the question .\ndo not take into consideration the paragraphs which have no relation to the question\n if there is not a paragraph that is related to the question, respond that for this question it's best to reach out to our customer service team . They'll be able to assist you with your needs\n just give me the answer I don't want any other details \n###context:\n{contexts[1]}\n ###question:\nWhat are the benefits of opting for the Djezzy Legend 100 DA package? \n###answer:\n{reponses[1]}<eos>\nuser read each paraphrase in the context and Answer the question .\ndo not take into consideration the paragraphs which have no relation to the question\n if there is not a paragraph that is related to the question, respond that for this question it's best to reach out to our customer service team . They'll be able to assist you with your needs\n just give me the answer I don't want any other details \n###context:\n{context}\n###question:\n{query_text}\n###answer:\n<end_of_turn>\n <start_of_turn>model" # replace the command here with something relevant to your task
|
128 |
+
#pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer,temperature=0.1,top_p=0.9, max_length=4000)
|
129 |
+
#result = pipe(prompt)
|
130 |
+
#repons=result[0]['generated_text'].split('[/INST]')[1].strip()
|
131 |
+
#generate=repons.replace("<start_of_turn>model", "")
|
132 |
+
#generates.append(generate)
|
133 |
+
#print(generate)
|
134 |
+
#print(result[0]['generated_text'])
|
135 |
+
else:
|
136 |
+
context = "\n---------------------\n".join([doc for doc, _ in similar_docs2[:2]] if len(similar_docs2) >= 2 else [doc for doc, _ in similar_docs2[:1]])
|
137 |
+
system_message=" "
|
138 |
+
prompt = f"[INST] <<SYS>>\n As Djezzy's chatbot\nread each paraphrase in the context and Answer the question .\ndo not take into consideration the paragraphs which have no relation to the question\n if there is not a paragraph that is related to the question, respond that for this question it's best to reach out to our customer service team . They'll be able to assist you with your needs\n just give me the answer I don't want any other details \n ###context:{context}<</SYS>>\n\n ###question: {query_text} [/INST]"
|
139 |
+
#prompt = f" <bos><start_of_turn>user \n read each paraphrase in the context and Answer the question .\ndo not take into consideration the paragraphs which have no relation to the question\n if there is not a paragraph that is related to the question, respond that for this question it's best to reach out to our customer service team . They'll be able to assist you with your needs\n just give me the answer I don't want any other details \n###context:\n{contexts[0]}\n ###question:\nWhat are the benefits of opting for the Djezzy Legend 100 DA package? \n###answer:\n{reponses[0]}<eos>\nuser \n read each paraphrase in the context and Answer the question .\ndo not take into consideration the paragraphs which have no relation to the question\n if there is not a paragraph that is related to the question, respond that for this question it's best to reach out to our customer service team . They'll be able to assist you with your needs\n just give me the answer I don't want any other details \n###context:\n{contexts[1]}\n ###question:\nWhat are the benefits of opting for the Djezzy Legend 100 DA package? \n###answer:\n{reponses[1]}<eos>\nuser read each paraphrase in the context and Answer the question .\ndo not take into consideration the paragraphs which have no relation to the question\n if there is not a paragraph that is related to the question, respond that for this question it's best to reach out to our customer service team . They'll be able to assist you with your needs\n just give me the answer I don't want any other details \n###context:\n{context}\n###question:\n{query_text}\n###answer:\n<end_of_turn>\n <start_of_turn>model" # replace the command here with something relevant to your task
|
140 |
+
#pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer,temperature=0.1,top_p=0.9, max_length=4000)
|
141 |
+
#result = pipe(prompt)
|
142 |
+
#repons=result[0]['generated_text'].split('[/INST]')[1].strip()
|
143 |
+
#generate=repons.replace("<start_of_turn>model", "")
|
144 |
+
#generates.append(generate)
|
145 |
+
#print(generate)
|
146 |
+
#print(result[0]['generated_text'])
|
147 |
+
|
148 |
+
else:
|
149 |
+
context="\n---------------------\n".join([doc for doc,_ in similar_docs1[:1]])
|
150 |
+
system_message=" "
|
151 |
+
prompt = f"[INST] <<SYS>>\n As Djezzy's chatbot\nread 3 times each paraphrase in the context and Answer the question .\ndo not take into consideration the paragraphs which have no relation to the question\n if there is not a paragraph that is related to the question, respond that for this question it's best to reach out to our customer service team . They'll be able to assist you with your needs\n just give me the answer I don't want any other details \n differentiates between each price and gives the correct answer and does not distinguish between the offers of each price\n ###context:{context}<</SYS>>\n\n {query_text}[/INST]"
|
152 |
+
#prompt = f" <bos><start_of_turn>user \n read each paraphrase in the context and Answer the question .\ndo not take into consideration the paragraphs which have no relation to the question\n if there is not a paragraph that is related to the question, respond that for this question it's best to reach out to our customer service team . They'll be able to assist you with your needs\n just give me the answer I don't want any other details \n###context:\n{contexts[0]}\n ###question:\nWhat are the benefits of opting for the Djezzy Legend 100 DA package? \n###answer:\n{reponses[0]}<eos>\nuser \n read each paraphrase in the context and Answer the question .\ndo not take into consideration the paragraphs which have no relation to the question\n if there is not a paragraph that is related to the question, respond that for this question it's best to reach out to our customer service team . They'll be able to assist you with your needs\n just give me the answer I don't want any other details \n###context:\n{contexts[1]}\n ###question:\nWhat are the benefits of opting for the Djezzy Legend 100 DA package? \n###answer:\n{reponses[1]}<eos>\nuser read each paraphrase in the context and Answer the question .\ndo not take into consideration the paragraphs which have no relation to the question\n if there is not a paragraph that is related to the question, respond that for this question it's best to reach out to our customer service team . They'll be able to assist you with your needs\n just give me the answer I don't want any other details \n###context:\n{context}\n###question:\n{query_text}\n###answer:\n<end_of_turn>\n <start_of_turn>model" # replace the command here with something relevant to your task
|
153 |
+
#pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer,temperature=0.1,top_p=0.9, max_length=4000)
|
154 |
+
#result = pipe(prompt)
|
155 |
+
#repons=result[0]['generated_text'].split('[/INST]')[1].strip()
|
156 |
+
#generate=repons.replace("<start_of_turn>model", "")
|
157 |
+
#generates.append(generate)
|
158 |
+
#print(generate)
|
159 |
+
#print(result[0]['generated_text'])
|
160 |
+
else:
|
161 |
+
i=0
|
162 |
+
similar_docs=[]
|
163 |
+
for i in range(len(mots_trouves)):
|
164 |
+
k=mots_trouves[i]
|
165 |
+
result=vector_db.similarity_search(
|
166 |
+
query_text,
|
167 |
+
k=1,
|
168 |
+
filter={'document':mots_trouves[i] }
|
169 |
+
)
|
170 |
+
similar_docs.append(result[0])
|
171 |
+
context="\n---------------------\n".join([similar_docs[i].page_content for i in range(len(similar_docs))])
|
172 |
+
system_message=" "
|
173 |
+
prompt = f"[INST] <<SYS>>\n As Djezzy's chatbot\nread each paraphrase in the context and Answer the question .\ndo not take into consideration the paragraphs which have no relation to the question\n if there is not a paragraph that is related to the question, respond that for this question it's best to reach out to our customer service team . They'll be able to assist you with your needs\n just give me the answer I don't want any other details \n ###context:{context}<</SYS>>\n\n ###question: {query_text} [/INST]"
|
174 |
+
#prompt = f" <bos><start_of_turn>user \n read each paraphrase in the context and Answer the question .\ndo not take into consideration the paragraphs which have no relation to the question\n if there is not a paragraph that is related to the question, respond that for this question it's best to reach out to our customer service team . They'll be able to assist you with your needs\n just give me the answer I don't want any other details \n###context:\n{contexts[0]}\n ###question:\nWhat are the benefits of opting for the Djezzy Legend 100 DA package? \n###answer:\n{reponses[0]}<eos>\nuser \n read each paraphrase in the context and Answer the question .\ndo not take into consideration the paragraphs which have no relation to the question\n if there is not a paragraph that is related to the question, respond that for this question it's best to reach out to our customer service team . They'll be able to assist you with your needs\n just give me the answer I don't want any other details \n###context:\n{contexts[1]}\n ###question:\nWhat are the benefits of opting for the Djezzy Legend 100 DA package? \n###answer:\n{reponses[1]}<eos>\nuser read each paraphrase in the context and Answer the question .\ndo not take into consideration the paragraphs which have no relation to the question\n if there is not a paragraph that is related to the question, respond that for this question it's best to reach out to our customer service team . They'll be able to assist you with your needs\n just give me the answer I don't want any other details \n###context:\n{context}\n###question:\n{query_text}\n###answer:\n<end_of_turn>\n <start_of_turn>model" # replace the command here with something relevant to your task
|
175 |
+
#pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer,temperature=0.1,top_p=0.9, max_length=4000)
|
176 |
+
#result = pipe(prompt)
|
177 |
+
#repons=result[0]['generated_text'].split('[/INST]')[1].strip()
|
178 |
+
#generate=repons.replace("<start_of_turn>model", "")
|
179 |
+
#generates.append(generate)
|
180 |
+
#print(generate)
|
181 |
+
#print(result[0]['generated_text'])
|
182 |
+
return prompt
|