Spaces:

tarekfer8
/

tarek

Sleeping

App Files Files Community

tarekfer8 commited on Jun 6, 2024

Commit

1281b5f

verified ·

1 Parent(s): 73b7aee

Update djezzy.py

Browse files

Files changed (1) hide show

djezzy.py +133 -9

djezzy.py CHANGED Viewed

@@ -37,22 +37,146 @@ from datasets import Dataset
 import re
 import pandas as pd
 import os
-with open('/content/mots_clés.txt', 'r') as fichier:
-    contenu = fichier.read()
-# Séparer les mots en utilisant la virgule comme séparateur
-mots = contenu.split(',')
-# Afficher les mots pour vérifier
-for mot in mots:
-    print(mot.strip())
-# stocker les mots dans un tableau (une liste)
-tableau_de_mots = [mot.strip() for mot in mots]
 from langchain.embeddings import SentenceTransformerEmbeddings
 model_name = "sentence-transformers/all-MiniLM-L6-v2"
 embedding_llm = SentenceTransformerEmbeddings(model_name=model_name)

 import re
 import pandas as pd
 import os
+from langchain.vectorstores import FAISS
+from sklearn.metrics.pairwise import cosine_similarity
+def load_data(text_filename='docs_text.json', embeddings_filename='docs_embeddings.json'):
+    import json
+    import pickle
+    with open(text_filename, 'r', encoding='utf-8') as f:
+        docs_text = json.load(f)
+    with open(embeddings_filename, 'r') as f:
+        docs_embeddings = json.load(f)
+    return docs_text, docs_embeddings
+#docs_text, docs_embeddings = load_data()
+def mot_cle(path)
+ with open(path, 'r') as fichier:
+     contenu = fichier.read()
+ # Séparer les mots en utilisant la virgule comme séparateur
+ mots = contenu.split(',')
+ # Afficher les mots pour vérifier
+ for mot in mots:
+     print(mot.strip())
+ # stocker les mots dans un tableau (une liste)
+ tableau_de_mots = [mot.strip() for mot in mots]
+ return  tableau_de_mots
 from langchain.embeddings import SentenceTransformerEmbeddings
 model_name = "sentence-transformers/all-MiniLM-L6-v2"
 embedding_llm = SentenceTransformerEmbeddings(model_name=model_name)
+def vector(path)
+ loaded_vector_db = FAISS.load_local(path, embedding_llm, allow_dangerous_deserialization=True)
+ return  loaded_vector_db
+def pip(question)
+ query_text = question
+ query_embedding = embedding_llm.embed_query(query_text)
+ query_embedding_array = np.array(query_embedding)
+ docs_embeddings=np.array(docs_embeddings)
+  # Question à analyser
+ question = query_text
+  # Convertir la question en une liste de mots
+ mots_question = question.lower().split()
+ bi_grammes = [' '.join([mots_question[i], mots_question[i+1]]) for i in range(len(mots_question)-1)]
+  #mots_a_verifier_lower=[mot.lower() for mot in mots_a_verifier]
+ mots_a_verifier_lower = {mot.lower(): mot for mot in mots_a_verifier}
+ mots_question_lower=[mot.lower() for mot in mots_question]
+ bi_grammes_lower=[mot.lower() for mot in bi_grammes]
+  # Trouver les mots de la question qui sont dans le tableau
+ mots_trouves1 = [mots_a_verifier_lower[mot] for mot in mots_a_verifier_lower if  mot in bi_grammes_lower]
+  if not mots_trouves1:
+    mots_trouves1 = [mots_a_verifier_lower[mot] for mot in mots_a_verifier_lower if mot in mots_question_lower ]
+  # Afficher les mots trouvés
+    mots_trouves=mots_trouves1
+  if not mots_trouves:
+   similarities = [cosine_similarity(doc.reshape(1,-1), query_embedding_array.reshape(1,-1)) for doc in docs_embeddings]
+   sorted_docs = sorted(zip(docs_text, docs_embeddings, similarities), key=lambda x: x[2], reverse=True)
+   similar_docs1 = [(doc,sim) for doc, _, sim in sorted_docs if sim > 0.72]
+   if  not similar_docs1:
+    similar_docs2 = [(doc,sim) for doc, _, sim in sorted_docs if sim > 0.65]
+    if  not similar_docs2:
+     similar_docs = [(doc,sim) for doc, _, sim in sorted_docs if sim > 0.4]
+     if  not similar_docs:
+       similar_docsA = [(doc,sim) for doc, _, sim in sorted_docs if (sim >= 0.3 and sim<0.4)]
+       if  not similar_docsA:
+         print("As a chatbot for Djezzy, I can provide information exclusively about our affiliated companies. Unfortunately, I'm unable to respond to inquiries outside of that scope.")
+         generate2="As a chatbot for Djezzy, I can provide information exclusively about our affiliated companies. Unfortunately, I'm unable to respond to inquiries outside of that scope."
+         generates.append(generate2)
+       else:
+        print("I apologize, I don't fully understand your question. You can contact our customer service for answers to your needs, or if you can provide more details, I would be happy to help.")
+        generate1="I apologize, I don't fully understand your question. You can contact our customer service for answers to your needs, or if you can provide more details, I would be happy to help."
+        generates.append(generate1)
+     else:
+        context="\n---------------------\n".join([doc for doc,_ in similar_docs[:4]]if len(similar_docs) >=3 else [doc for doc, _ in similar_docs[:1]])
+        system_message=" "
+        prompt = f"[INST] <<SYS>>\n As Djezzy's chatbot\nread each paraphrase in the context and Answer  the  question .\ndo not take into consideration the paragraphs which have no relation to the question\n if there is not a paragraph that is related to the question, respond that for this question it's best to reach out to our customer service team . They'll be able to assist you with your needs\n just give me the answer I don't want any other details \n ###context:{context}<</SYS>>\n\n  ###question: {query_text} [/INST]"
+        #prompt = f" <bos><start_of_turn>user \n read each paraphrase in the context and Answer  the  question .\ndo not take into consideration the paragraphs which have no relation to the question\n if there is not a paragraph that is related to the question, respond that for this question it's best to reach out to our customer service team . They'll be able to assist you with your needs\n just give me the answer I don't want any other details \n###context:\n{contexts[0]}\n ###question:\nWhat are the benefits of opting for the Djezzy Legend 100 DA package? \n###answer:\n{reponses[0]}<eos>\nuser \n read each paraphrase in the context and Answer  the  question .\ndo not take into consideration the paragraphs which have no relation to the question\n if there is not a paragraph that is related to the question, respond that for this question it's best to reach out to our customer service team . They'll be able to assist you with your needs\n just give me the answer I don't want any other details \n###context:\n{contexts[1]}\n ###question:\nWhat are the benefits of opting for the Djezzy Legend 100 DA package? \n###answer:\n{reponses[1]}<eos>\nuser read each paraphrase in the context and Answer  the  question .\ndo not take into consideration the paragraphs which have no relation to the question\n if there is not a paragraph that is related to the question, respond that for this question it's best to reach out to our customer service team . They'll be able to assist you with your needs\n just give me the answer I don't want any other details \n###context:\n{context}\n###question:\n{query_text}\n###answer:\n<end_of_turn>\n <start_of_turn>model" # replace the command here with something relevant to your task
+        #pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer,temperature=0.1,top_p=0.9, max_length=4000)
+        #result = pipe(prompt)
+        #repons=result[0]['generated_text'].split('[/INST]')[1].strip()
+        #generate=repons.replace("<start_of_turn>model", "")
+        #generates.append(generate)
+        #print(generate)
+        #print(result[0]['generated_text'])
+    else:
+     context = "\n---------------------\n".join([doc for doc, _ in similar_docs2[:2]] if len(similar_docs2) >= 2 else [doc for doc, _ in similar_docs2[:1]])
+     system_message=" "
+     prompt = f"[INST] <<SYS>>\n  As Djezzy's chatbot\nread each paraphrase in the context and Answer  the  question .\ndo not take into consideration the paragraphs which have no relation to the question\n if there is not a paragraph that is related to the question, respond that for this question it's best to reach out to our customer service team . They'll be able to assist you with your needs\n just give me the answer I don't want any other details \n ###context:{context}<</SYS>>\n\n  ###question: {query_text} [/INST]"
+     #prompt = f" <bos><start_of_turn>user \n read each paraphrase in the context and Answer  the  question .\ndo not take into consideration the paragraphs which have no relation to the question\n if there is not a paragraph that is related to the question, respond that for this question it's best to reach out to our customer service team . They'll be able to assist you with your needs\n just give me the answer I don't want any other details \n###context:\n{contexts[0]}\n ###question:\nWhat are the benefits of opting for the Djezzy Legend 100 DA package? \n###answer:\n{reponses[0]}<eos>\nuser \n read each paraphrase in the context and Answer  the  question .\ndo not take into consideration the paragraphs which have no relation to the question\n if there is not a paragraph that is related to the question, respond that for this question it's best to reach out to our customer service team . They'll be able to assist you with your needs\n just give me the answer I don't want any other details \n###context:\n{contexts[1]}\n ###question:\nWhat are the benefits of opting for the Djezzy Legend 100 DA package? \n###answer:\n{reponses[1]}<eos>\nuser read each paraphrase in the context and Answer  the  question .\ndo not take into consideration the paragraphs which have no relation to the question\n if there is not a paragraph that is related to the question, respond that for this question it's best to reach out to our customer service team . They'll be able to assist you with your needs\n just give me the answer I don't want any other details \n###context:\n{context}\n###question:\n{query_text}\n###answer:\n<end_of_turn>\n <start_of_turn>model" # replace the command here with something relevant to your task
+     #pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer,temperature=0.1,top_p=0.9, max_length=4000)
+     #result = pipe(prompt)
+     #repons=result[0]['generated_text'].split('[/INST]')[1].strip()
+     #generate=repons.replace("<start_of_turn>model", "")
+     #generates.append(generate)
+     #print(generate)
+     #print(result[0]['generated_text'])
+   else:
+    context="\n---------------------\n".join([doc for doc,_ in similar_docs1[:1]])
+    system_message=" "
+    prompt = f"[INST] <<SYS>>\n As Djezzy's chatbot\nread 3 times  each paraphrase in the context and Answer  the  question .\ndo not take into consideration the paragraphs which have no relation to the question\n if there is not a paragraph that is related to the question, respond that for this question it's best to reach out to our customer service team . They'll be able to assist you with your needs\n just give me the answer I don't want any other details \n differentiates between each price and gives the correct answer and does not distinguish between the offers of each price\n ###context:{context}<</SYS>>\n\n {query_text}[/INST]"
+    #prompt = f" <bos><start_of_turn>user \n read each paraphrase in the context and Answer  the  question .\ndo not take into consideration the paragraphs which have no relation to the question\n if there is not a paragraph that is related to the question, respond that for this question it's best to reach out to our customer service team . They'll be able to assist you with your needs\n just give me the answer I don't want any other details \n###context:\n{contexts[0]}\n ###question:\nWhat are the benefits of opting for the Djezzy Legend 100 DA package? \n###answer:\n{reponses[0]}<eos>\nuser \n read each paraphrase in the context and Answer  the  question .\ndo not take into consideration the paragraphs which have no relation to the question\n if there is not a paragraph that is related to the question, respond that for this question it's best to reach out to our customer service team . They'll be able to assist you with your needs\n just give me the answer I don't want any other details \n###context:\n{contexts[1]}\n ###question:\nWhat are the benefits of opting for the Djezzy Legend 100 DA package? \n###answer:\n{reponses[1]}<eos>\nuser read each paraphrase in the context and Answer  the  question .\ndo not take into consideration the paragraphs which have no relation to the question\n if there is not a paragraph that is related to the question, respond that for this question it's best to reach out to our customer service team . They'll be able to assist you with your needs\n just give me the answer I don't want any other details \n###context:\n{context}\n###question:\n{query_text}\n###answer:\n<end_of_turn>\n <start_of_turn>model" # replace the command here with something relevant to your task
+    #pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer,temperature=0.1,top_p=0.9, max_length=4000)
+    #result = pipe(prompt)
+    #repons=result[0]['generated_text'].split('[/INST]')[1].strip()
+    #generate=repons.replace("<start_of_turn>model", "")
+    #generates.append(generate)
+    #print(generate)
+    #print(result[0]['generated_text'])
+  else:
+     i=0
+     similar_docs=[]
+     for i in range(len(mots_trouves)):
+       k=mots_trouves[i]
+       result=vector_db.similarity_search(
+                                       query_text,
+                                       k=1,
+                                       filter={'document':mots_trouves[i] }
+                                     )
+       similar_docs.append(result[0])
+     context="\n---------------------\n".join([similar_docs[i].page_content for i in range(len(similar_docs))])
+     system_message=" "
+     prompt = f"[INST] <<SYS>>\n As Djezzy's chatbot\nread each paraphrase in the context and Answer  the  question .\ndo not take into consideration the paragraphs which have no relation to the question\n if there is not a paragraph that is related to the question, respond that for this question it's best to reach out to our customer service team . They'll be able to assist you with your needs\n just give me the answer I don't want any other details \n ###context:{context}<</SYS>>\n\n  ###question: {query_text} [/INST]"
+     #prompt = f" <bos><start_of_turn>user \n read each paraphrase in the context and Answer  the  question .\ndo not take into consideration the paragraphs which have no relation to the question\n if there is not a paragraph that is related to the question, respond that for this question it's best to reach out to our customer service team . They'll be able to assist you with your needs\n just give me the answer I don't want any other details \n###context:\n{contexts[0]}\n ###question:\nWhat are the benefits of opting for the Djezzy Legend 100 DA package? \n###answer:\n{reponses[0]}<eos>\nuser \n read each paraphrase in the context and Answer  the  question .\ndo not take into consideration the paragraphs which have no relation to the question\n if there is not a paragraph that is related to the question, respond that for this question it's best to reach out to our customer service team . They'll be able to assist you with your needs\n just give me the answer I don't want any other details \n###context:\n{contexts[1]}\n ###question:\nWhat are the benefits of opting for the Djezzy Legend 100 DA package? \n###answer:\n{reponses[1]}<eos>\nuser read each paraphrase in the context and Answer  the  question .\ndo not take into consideration the paragraphs which have no relation to the question\n if there is not a paragraph that is related to the question, respond that for this question it's best to reach out to our customer service team . They'll be able to assist you with your needs\n just give me the answer I don't want any other details \n###context:\n{context}\n###question:\n{query_text}\n###answer:\n<end_of_turn>\n <start_of_turn>model" # replace the command here with something relevant to your task
+     #pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer,temperature=0.1,top_p=0.9, max_length=4000)
+     #result = pipe(prompt)
+     #repons=result[0]['generated_text'].split('[/INST]')[1].strip()
+     #generate=repons.replace("<start_of_turn>model", "")
+     #generates.append(generate)
+     #print(generate)
+     #print(result[0]['generated_text'])
+ return prompt