tarekfer8 commited on
Commit
1281b5f
·
verified ·
1 Parent(s): 73b7aee

Update djezzy.py

Browse files
Files changed (1) hide show
  1. djezzy.py +133 -9
djezzy.py CHANGED
@@ -37,22 +37,146 @@ from datasets import Dataset
37
  import re
38
  import pandas as pd
39
  import os
 
 
 
 
 
40
 
41
- with open('/content/mots_clés.txt', 'r') as fichier:
42
- contenu = fichier.read()
43
 
44
- # Séparer les mots en utilisant la virgule comme séparateur
45
- mots = contenu.split(',')
46
 
47
- # Afficher les mots pour vérifier
48
- for mot in mots:
49
- print(mot.strip())
50
 
51
- # stocker les mots dans un tableau (une liste)
52
- tableau_de_mots = [mot.strip() for mot in mots]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
 
54
  from langchain.embeddings import SentenceTransformerEmbeddings
55
 
56
  model_name = "sentence-transformers/all-MiniLM-L6-v2"
57
 
58
  embedding_llm = SentenceTransformerEmbeddings(model_name=model_name)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  import re
38
  import pandas as pd
39
  import os
40
+ from langchain.vectorstores import FAISS
41
+ from sklearn.metrics.pairwise import cosine_similarity
42
+ def load_data(text_filename='docs_text.json', embeddings_filename='docs_embeddings.json'):
43
+ import json
44
+ import pickle
45
 
 
 
46
 
47
+ with open(text_filename, 'r', encoding='utf-8') as f:
48
+ docs_text = json.load(f)
49
 
 
 
 
50
 
51
+ with open(embeddings_filename, 'r') as f:
52
+ docs_embeddings = json.load(f)
53
+
54
+ return docs_text, docs_embeddings
55
+ #docs_text, docs_embeddings = load_data()
56
+ def mot_cle(path)
57
+ with open(path, 'r') as fichier:
58
+ contenu = fichier.read()
59
+
60
+ # Séparer les mots en utilisant la virgule comme séparateur
61
+ mots = contenu.split(',')
62
+
63
+ # Afficher les mots pour vérifier
64
+ for mot in mots:
65
+ print(mot.strip())
66
+
67
+ # stocker les mots dans un tableau (une liste)
68
+ tableau_de_mots = [mot.strip() for mot in mots]
69
+ return tableau_de_mots
70
 
71
  from langchain.embeddings import SentenceTransformerEmbeddings
72
 
73
  model_name = "sentence-transformers/all-MiniLM-L6-v2"
74
 
75
  embedding_llm = SentenceTransformerEmbeddings(model_name=model_name)
76
+ def vector(path)
77
+ loaded_vector_db = FAISS.load_local(path, embedding_llm, allow_dangerous_deserialization=True)
78
+ return loaded_vector_db
79
+ def pip(question)
80
+ query_text = question
81
+ query_embedding = embedding_llm.embed_query(query_text)
82
+ query_embedding_array = np.array(query_embedding)
83
+ docs_embeddings=np.array(docs_embeddings)
84
+
85
+
86
+
87
+ # Question à analyser
88
+ question = query_text
89
+
90
+ # Convertir la question en une liste de mots
91
+ mots_question = question.lower().split()
92
+ bi_grammes = [' '.join([mots_question[i], mots_question[i+1]]) for i in range(len(mots_question)-1)]
93
+ #mots_a_verifier_lower=[mot.lower() for mot in mots_a_verifier]
94
+ mots_a_verifier_lower = {mot.lower(): mot for mot in mots_a_verifier}
95
+ mots_question_lower=[mot.lower() for mot in mots_question]
96
+ bi_grammes_lower=[mot.lower() for mot in bi_grammes]
97
+ # Trouver les mots de la question qui sont dans le tableau
98
+ mots_trouves1 = [mots_a_verifier_lower[mot] for mot in mots_a_verifier_lower if mot in bi_grammes_lower]
99
+ if not mots_trouves1:
100
+ mots_trouves1 = [mots_a_verifier_lower[mot] for mot in mots_a_verifier_lower if mot in mots_question_lower ]
101
+ # Afficher les mots trouvés
102
+
103
+ mots_trouves=mots_trouves1
104
+ if not mots_trouves:
105
+
106
+ similarities = [cosine_similarity(doc.reshape(1,-1), query_embedding_array.reshape(1,-1)) for doc in docs_embeddings]
107
+ sorted_docs = sorted(zip(docs_text, docs_embeddings, similarities), key=lambda x: x[2], reverse=True)
108
+ similar_docs1 = [(doc,sim) for doc, _, sim in sorted_docs if sim > 0.72]
109
+ if not similar_docs1:
110
+ similar_docs2 = [(doc,sim) for doc, _, sim in sorted_docs if sim > 0.65]
111
+ if not similar_docs2:
112
+ similar_docs = [(doc,sim) for doc, _, sim in sorted_docs if sim > 0.4]
113
+ if not similar_docs:
114
+ similar_docsA = [(doc,sim) for doc, _, sim in sorted_docs if (sim >= 0.3 and sim<0.4)]
115
+ if not similar_docsA:
116
+ print("As a chatbot for Djezzy, I can provide information exclusively about our affiliated companies. Unfortunately, I'm unable to respond to inquiries outside of that scope.")
117
+ generate2="As a chatbot for Djezzy, I can provide information exclusively about our affiliated companies. Unfortunately, I'm unable to respond to inquiries outside of that scope."
118
+ generates.append(generate2)
119
+ else:
120
+ print("I apologize, I don't fully understand your question. You can contact our customer service for answers to your needs, or if you can provide more details, I would be happy to help.")
121
+ generate1="I apologize, I don't fully understand your question. You can contact our customer service for answers to your needs, or if you can provide more details, I would be happy to help."
122
+ generates.append(generate1)
123
+ else:
124
+ context="\n---------------------\n".join([doc for doc,_ in similar_docs[:4]]if len(similar_docs) >=3 else [doc for doc, _ in similar_docs[:1]])
125
+ system_message=" "
126
+ prompt = f"[INST] <<SYS>>\n As Djezzy's chatbot\nread each paraphrase in the context and Answer the question .\ndo not take into consideration the paragraphs which have no relation to the question\n if there is not a paragraph that is related to the question, respond that for this question it's best to reach out to our customer service team . They'll be able to assist you with your needs\n just give me the answer I don't want any other details \n ###context:{context}<</SYS>>\n\n ###question: {query_text} [/INST]"
127
+ #prompt = f" <bos><start_of_turn>user \n read each paraphrase in the context and Answer the question .\ndo not take into consideration the paragraphs which have no relation to the question\n if there is not a paragraph that is related to the question, respond that for this question it's best to reach out to our customer service team . They'll be able to assist you with your needs\n just give me the answer I don't want any other details \n###context:\n{contexts[0]}\n ###question:\nWhat are the benefits of opting for the Djezzy Legend 100 DA package? \n###answer:\n{reponses[0]}<eos>\nuser \n read each paraphrase in the context and Answer the question .\ndo not take into consideration the paragraphs which have no relation to the question\n if there is not a paragraph that is related to the question, respond that for this question it's best to reach out to our customer service team . They'll be able to assist you with your needs\n just give me the answer I don't want any other details \n###context:\n{contexts[1]}\n ###question:\nWhat are the benefits of opting for the Djezzy Legend 100 DA package? \n###answer:\n{reponses[1]}<eos>\nuser read each paraphrase in the context and Answer the question .\ndo not take into consideration the paragraphs which have no relation to the question\n if there is not a paragraph that is related to the question, respond that for this question it's best to reach out to our customer service team . They'll be able to assist you with your needs\n just give me the answer I don't want any other details \n###context:\n{context}\n###question:\n{query_text}\n###answer:\n<end_of_turn>\n <start_of_turn>model" # replace the command here with something relevant to your task
128
+ #pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer,temperature=0.1,top_p=0.9, max_length=4000)
129
+ #result = pipe(prompt)
130
+ #repons=result[0]['generated_text'].split('[/INST]')[1].strip()
131
+ #generate=repons.replace("<start_of_turn>model", "")
132
+ #generates.append(generate)
133
+ #print(generate)
134
+ #print(result[0]['generated_text'])
135
+ else:
136
+ context = "\n---------------------\n".join([doc for doc, _ in similar_docs2[:2]] if len(similar_docs2) >= 2 else [doc for doc, _ in similar_docs2[:1]])
137
+ system_message=" "
138
+ prompt = f"[INST] <<SYS>>\n As Djezzy's chatbot\nread each paraphrase in the context and Answer the question .\ndo not take into consideration the paragraphs which have no relation to the question\n if there is not a paragraph that is related to the question, respond that for this question it's best to reach out to our customer service team . They'll be able to assist you with your needs\n just give me the answer I don't want any other details \n ###context:{context}<</SYS>>\n\n ###question: {query_text} [/INST]"
139
+ #prompt = f" <bos><start_of_turn>user \n read each paraphrase in the context and Answer the question .\ndo not take into consideration the paragraphs which have no relation to the question\n if there is not a paragraph that is related to the question, respond that for this question it's best to reach out to our customer service team . They'll be able to assist you with your needs\n just give me the answer I don't want any other details \n###context:\n{contexts[0]}\n ###question:\nWhat are the benefits of opting for the Djezzy Legend 100 DA package? \n###answer:\n{reponses[0]}<eos>\nuser \n read each paraphrase in the context and Answer the question .\ndo not take into consideration the paragraphs which have no relation to the question\n if there is not a paragraph that is related to the question, respond that for this question it's best to reach out to our customer service team . They'll be able to assist you with your needs\n just give me the answer I don't want any other details \n###context:\n{contexts[1]}\n ###question:\nWhat are the benefits of opting for the Djezzy Legend 100 DA package? \n###answer:\n{reponses[1]}<eos>\nuser read each paraphrase in the context and Answer the question .\ndo not take into consideration the paragraphs which have no relation to the question\n if there is not a paragraph that is related to the question, respond that for this question it's best to reach out to our customer service team . They'll be able to assist you with your needs\n just give me the answer I don't want any other details \n###context:\n{context}\n###question:\n{query_text}\n###answer:\n<end_of_turn>\n <start_of_turn>model" # replace the command here with something relevant to your task
140
+ #pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer,temperature=0.1,top_p=0.9, max_length=4000)
141
+ #result = pipe(prompt)
142
+ #repons=result[0]['generated_text'].split('[/INST]')[1].strip()
143
+ #generate=repons.replace("<start_of_turn>model", "")
144
+ #generates.append(generate)
145
+ #print(generate)
146
+ #print(result[0]['generated_text'])
147
+
148
+ else:
149
+ context="\n---------------------\n".join([doc for doc,_ in similar_docs1[:1]])
150
+ system_message=" "
151
+ prompt = f"[INST] <<SYS>>\n As Djezzy's chatbot\nread 3 times each paraphrase in the context and Answer the question .\ndo not take into consideration the paragraphs which have no relation to the question\n if there is not a paragraph that is related to the question, respond that for this question it's best to reach out to our customer service team . They'll be able to assist you with your needs\n just give me the answer I don't want any other details \n differentiates between each price and gives the correct answer and does not distinguish between the offers of each price\n ###context:{context}<</SYS>>\n\n {query_text}[/INST]"
152
+ #prompt = f" <bos><start_of_turn>user \n read each paraphrase in the context and Answer the question .\ndo not take into consideration the paragraphs which have no relation to the question\n if there is not a paragraph that is related to the question, respond that for this question it's best to reach out to our customer service team . They'll be able to assist you with your needs\n just give me the answer I don't want any other details \n###context:\n{contexts[0]}\n ###question:\nWhat are the benefits of opting for the Djezzy Legend 100 DA package? \n###answer:\n{reponses[0]}<eos>\nuser \n read each paraphrase in the context and Answer the question .\ndo not take into consideration the paragraphs which have no relation to the question\n if there is not a paragraph that is related to the question, respond that for this question it's best to reach out to our customer service team . They'll be able to assist you with your needs\n just give me the answer I don't want any other details \n###context:\n{contexts[1]}\n ###question:\nWhat are the benefits of opting for the Djezzy Legend 100 DA package? \n###answer:\n{reponses[1]}<eos>\nuser read each paraphrase in the context and Answer the question .\ndo not take into consideration the paragraphs which have no relation to the question\n if there is not a paragraph that is related to the question, respond that for this question it's best to reach out to our customer service team . They'll be able to assist you with your needs\n just give me the answer I don't want any other details \n###context:\n{context}\n###question:\n{query_text}\n###answer:\n<end_of_turn>\n <start_of_turn>model" # replace the command here with something relevant to your task
153
+ #pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer,temperature=0.1,top_p=0.9, max_length=4000)
154
+ #result = pipe(prompt)
155
+ #repons=result[0]['generated_text'].split('[/INST]')[1].strip()
156
+ #generate=repons.replace("<start_of_turn>model", "")
157
+ #generates.append(generate)
158
+ #print(generate)
159
+ #print(result[0]['generated_text'])
160
+ else:
161
+ i=0
162
+ similar_docs=[]
163
+ for i in range(len(mots_trouves)):
164
+ k=mots_trouves[i]
165
+ result=vector_db.similarity_search(
166
+ query_text,
167
+ k=1,
168
+ filter={'document':mots_trouves[i] }
169
+ )
170
+ similar_docs.append(result[0])
171
+ context="\n---------------------\n".join([similar_docs[i].page_content for i in range(len(similar_docs))])
172
+ system_message=" "
173
+ prompt = f"[INST] <<SYS>>\n As Djezzy's chatbot\nread each paraphrase in the context and Answer the question .\ndo not take into consideration the paragraphs which have no relation to the question\n if there is not a paragraph that is related to the question, respond that for this question it's best to reach out to our customer service team . They'll be able to assist you with your needs\n just give me the answer I don't want any other details \n ###context:{context}<</SYS>>\n\n ###question: {query_text} [/INST]"
174
+ #prompt = f" <bos><start_of_turn>user \n read each paraphrase in the context and Answer the question .\ndo not take into consideration the paragraphs which have no relation to the question\n if there is not a paragraph that is related to the question, respond that for this question it's best to reach out to our customer service team . They'll be able to assist you with your needs\n just give me the answer I don't want any other details \n###context:\n{contexts[0]}\n ###question:\nWhat are the benefits of opting for the Djezzy Legend 100 DA package? \n###answer:\n{reponses[0]}<eos>\nuser \n read each paraphrase in the context and Answer the question .\ndo not take into consideration the paragraphs which have no relation to the question\n if there is not a paragraph that is related to the question, respond that for this question it's best to reach out to our customer service team . They'll be able to assist you with your needs\n just give me the answer I don't want any other details \n###context:\n{contexts[1]}\n ###question:\nWhat are the benefits of opting for the Djezzy Legend 100 DA package? \n###answer:\n{reponses[1]}<eos>\nuser read each paraphrase in the context and Answer the question .\ndo not take into consideration the paragraphs which have no relation to the question\n if there is not a paragraph that is related to the question, respond that for this question it's best to reach out to our customer service team . They'll be able to assist you with your needs\n just give me the answer I don't want any other details \n###context:\n{context}\n###question:\n{query_text}\n###answer:\n<end_of_turn>\n <start_of_turn>model" # replace the command here with something relevant to your task
175
+ #pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer,temperature=0.1,top_p=0.9, max_length=4000)
176
+ #result = pipe(prompt)
177
+ #repons=result[0]['generated_text'].split('[/INST]')[1].strip()
178
+ #generate=repons.replace("<start_of_turn>model", "")
179
+ #generates.append(generate)
180
+ #print(generate)
181
+ #print(result[0]['generated_text'])
182
+ return prompt