%%capture !pip install -U bitsandbytes !pip install -U peft !pip install -U accelerate !pip install -U trl !pip install datasets==2.16.0 !pip install sentencepiece !pip install -q -U git+https://github.com/huggingface/transformers.git !pip install "torch>=2.1.1" -U !pip install accelerate !pip install -q python-dotenv==1.0.0 !pip install -q pandas==1.5.3 !pip install -q unstructured==0.7.12 !pip install -q wikipedia==1.4.0 !pip install -q pypdf==3.12.0 !pip install -q jq==1.4.1 !pip install -q nltk==3.8.1 !pip install -q tiktoken==0.4.0 !pip install -q sentencepiece==0.1.99 !pip install -q sentence-transformers==2.2.2 !pip install -q cohere==4.11.2 !pip install -q faiss-cpu==1.7.4 !pip install -q chromadb==0.3.26 !pip install transformers !pip install langchain !pip install langchain-community from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig,HfArgumentParser,TrainingArguments,pipeline, logging from peft import LoraConfig, PeftModel, prepare_model_for_kbit_training, get_peft_model import os,torch from datasets import load_dataset from trl import SFTTrainer import pandas as pd import pyarrow as pa import pyarrow.dataset as ds from datasets import Dataset import re import pandas as pd import os from langchain.vectorstores import FAISS from sklearn.metrics.pairwise import cosine_similarity def load_data(text_filename='docs_text.json', embeddings_filename='docs_embeddings.json'): import json import pickle with open(text_filename, 'r', encoding='utf-8') as f: docs_text = json.load(f) with open(embeddings_filename, 'r') as f: docs_embeddings = json.load(f) return docs_text, docs_embeddings #docs_text, docs_embeddings = load_data() def mot_cle(path) with open(path, 'r') as fichier: contenu = fichier.read() # Séparer les mots en utilisant la virgule comme séparateur mots = contenu.split(',') # Afficher les mots pour vérifier for mot in mots: print(mot.strip()) # stocker les mots dans un tableau (une liste) tableau_de_mots = [mot.strip() for mot in mots] return tableau_de_mots def vector(path) loaded_vector_db = FAISS.load_local(path, embedding_llm, allow_dangerous_deserialization=True) return loaded_vector_db def pip(question) query_text = question query_embedding = embedding_llm.embed_query(query_text) query_embedding_array = np.array(query_embedding) docs_embeddings=np.array(docs_embeddings) # Question à analyser question = query_text # Convertir la question en une liste de mots mots_question = question.lower().split() bi_grammes = [' '.join([mots_question[i], mots_question[i+1]]) for i in range(len(mots_question)-1)] #mots_a_verifier_lower=[mot.lower() for mot in mots_a_verifier] mots_a_verifier_lower = {mot.lower(): mot for mot in mots_a_verifier} mots_question_lower=[mot.lower() for mot in mots_question] bi_grammes_lower=[mot.lower() for mot in bi_grammes] # Trouver les mots de la question qui sont dans le tableau mots_trouves1 = [mots_a_verifier_lower[mot] for mot in mots_a_verifier_lower if mot in bi_grammes_lower] if not mots_trouves1: mots_trouves1 = [mots_a_verifier_lower[mot] for mot in mots_a_verifier_lower if mot in mots_question_lower ] # Afficher les mots trouvés mots_trouves=mots_trouves1 if not mots_trouves: similarities = [cosine_similarity(doc.reshape(1,-1), query_embedding_array.reshape(1,-1)) for doc in docs_embeddings] sorted_docs = sorted(zip(docs_text, docs_embeddings, similarities), key=lambda x: x[2], reverse=True) similar_docs1 = [(doc,sim) for doc, _, sim in sorted_docs if sim > 0.72] if not similar_docs1: similar_docs2 = [(doc,sim) for doc, _, sim in sorted_docs if sim > 0.65] if not similar_docs2: similar_docs = [(doc,sim) for doc, _, sim in sorted_docs if sim > 0.4] if not similar_docs: similar_docsA = [(doc,sim) for doc, _, sim in sorted_docs if (sim >= 0.3 and sim<0.4)] if not similar_docsA: print("As a chatbot for Djezzy, I can provide information exclusively about our affiliated companies. Unfortunately, I'm unable to respond to inquiries outside of that scope.") generate2="As a chatbot for Djezzy, I can provide information exclusively about our affiliated companies. Unfortunately, I'm unable to respond to inquiries outside of that scope." generates.append(generate2) else: print("I apologize, I don't fully understand your question. You can contact our customer service for answers to your needs, or if you can provide more details, I would be happy to help.") generate1="I apologize, I don't fully understand your question. You can contact our customer service for answers to your needs, or if you can provide more details, I would be happy to help." generates.append(generate1) else: context="\n---------------------\n".join([doc for doc,_ in similar_docs[:4]]if len(similar_docs) >=3 else [doc for doc, _ in similar_docs[:1]]) system_message=" " prompt = f"[INST] <>\n As Djezzy's chatbot\nread each paraphrase in the context and Answer the question .\ndo not take into consideration the paragraphs which have no relation to the question\n if there is not a paragraph that is related to the question, respond that for this question it's best to reach out to our customer service team . They'll be able to assist you with your needs\n just give me the answer I don't want any other details \n ###context:{context}<>\n\n ###question: {query_text} [/INST]" #prompt = f" user \n read each paraphrase in the context and Answer the question .\ndo not take into consideration the paragraphs which have no relation to the question\n if there is not a paragraph that is related to the question, respond that for this question it's best to reach out to our customer service team . They'll be able to assist you with your needs\n just give me the answer I don't want any other details \n###context:\n{contexts[0]}\n ###question:\nWhat are the benefits of opting for the Djezzy Legend 100 DA package? \n###answer:\n{reponses[0]}\nuser \n read each paraphrase in the context and Answer the question .\ndo not take into consideration the paragraphs which have no relation to the question\n if there is not a paragraph that is related to the question, respond that for this question it's best to reach out to our customer service team . They'll be able to assist you with your needs\n just give me the answer I don't want any other details \n###context:\n{contexts[1]}\n ###question:\nWhat are the benefits of opting for the Djezzy Legend 100 DA package? \n###answer:\n{reponses[1]}\nuser read each paraphrase in the context and Answer the question .\ndo not take into consideration the paragraphs which have no relation to the question\n if there is not a paragraph that is related to the question, respond that for this question it's best to reach out to our customer service team . They'll be able to assist you with your needs\n just give me the answer I don't want any other details \n###context:\n{context}\n###question:\n{query_text}\n###answer:\n\n model" # replace the command here with something relevant to your task #pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer,temperature=0.1,top_p=0.9, max_length=4000) #result = pipe(prompt) #repons=result[0]['generated_text'].split('[/INST]')[1].strip() #generate=repons.replace("model", "") #generates.append(generate) #print(generate) #print(result[0]['generated_text']) else: context = "\n---------------------\n".join([doc for doc, _ in similar_docs2[:2]] if len(similar_docs2) >= 2 else [doc for doc, _ in similar_docs2[:1]]) system_message=" " prompt = f"[INST] <>\n As Djezzy's chatbot\nread each paraphrase in the context and Answer the question .\ndo not take into consideration the paragraphs which have no relation to the question\n if there is not a paragraph that is related to the question, respond that for this question it's best to reach out to our customer service team . They'll be able to assist you with your needs\n just give me the answer I don't want any other details \n ###context:{context}<>\n\n ###question: {query_text} [/INST]" #prompt = f" user \n read each paraphrase in the context and Answer the question .\ndo not take into consideration the paragraphs which have no relation to the question\n if there is not a paragraph that is related to the question, respond that for this question it's best to reach out to our customer service team . They'll be able to assist you with your needs\n just give me the answer I don't want any other details \n###context:\n{contexts[0]}\n ###question:\nWhat are the benefits of opting for the Djezzy Legend 100 DA package? \n###answer:\n{reponses[0]}\nuser \n read each paraphrase in the context and Answer the question .\ndo not take into consideration the paragraphs which have no relation to the question\n if there is not a paragraph that is related to the question, respond that for this question it's best to reach out to our customer service team . They'll be able to assist you with your needs\n just give me the answer I don't want any other details \n###context:\n{contexts[1]}\n ###question:\nWhat are the benefits of opting for the Djezzy Legend 100 DA package? \n###answer:\n{reponses[1]}\nuser read each paraphrase in the context and Answer the question .\ndo not take into consideration the paragraphs which have no relation to the question\n if there is not a paragraph that is related to the question, respond that for this question it's best to reach out to our customer service team . They'll be able to assist you with your needs\n just give me the answer I don't want any other details \n###context:\n{context}\n###question:\n{query_text}\n###answer:\n\n model" # replace the command here with something relevant to your task #pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer,temperature=0.1,top_p=0.9, max_length=4000) #result = pipe(prompt) #repons=result[0]['generated_text'].split('[/INST]')[1].strip() #generate=repons.replace("model", "") #generates.append(generate) #print(generate) #print(result[0]['generated_text']) else: context="\n---------------------\n".join([doc for doc,_ in similar_docs1[:1]]) system_message=" " prompt = f"[INST] <>\n As Djezzy's chatbot\nread 3 times each paraphrase in the context and Answer the question .\ndo not take into consideration the paragraphs which have no relation to the question\n if there is not a paragraph that is related to the question, respond that for this question it's best to reach out to our customer service team . They'll be able to assist you with your needs\n just give me the answer I don't want any other details \n differentiates between each price and gives the correct answer and does not distinguish between the offers of each price\n ###context:{context}<>\n\n {query_text}[/INST]" #prompt = f" user \n read each paraphrase in the context and Answer the question .\ndo not take into consideration the paragraphs which have no relation to the question\n if there is not a paragraph that is related to the question, respond that for this question it's best to reach out to our customer service team . They'll be able to assist you with your needs\n just give me the answer I don't want any other details \n###context:\n{contexts[0]}\n ###question:\nWhat are the benefits of opting for the Djezzy Legend 100 DA package? \n###answer:\n{reponses[0]}\nuser \n read each paraphrase in the context and Answer the question .\ndo not take into consideration the paragraphs which have no relation to the question\n if there is not a paragraph that is related to the question, respond that for this question it's best to reach out to our customer service team . They'll be able to assist you with your needs\n just give me the answer I don't want any other details \n###context:\n{contexts[1]}\n ###question:\nWhat are the benefits of opting for the Djezzy Legend 100 DA package? \n###answer:\n{reponses[1]}\nuser read each paraphrase in the context and Answer the question .\ndo not take into consideration the paragraphs which have no relation to the question\n if there is not a paragraph that is related to the question, respond that for this question it's best to reach out to our customer service team . They'll be able to assist you with your needs\n just give me the answer I don't want any other details \n###context:\n{context}\n###question:\n{query_text}\n###answer:\n\n model" # replace the command here with something relevant to your task #pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer,temperature=0.1,top_p=0.9, max_length=4000) #result = pipe(prompt) #repons=result[0]['generated_text'].split('[/INST]')[1].strip() #generate=repons.replace("model", "") #generates.append(generate) #print(generate) #print(result[0]['generated_text']) else: i=0 similar_docs=[] for i in range(len(mots_trouves)): k=mots_trouves[i] result=vector_db.similarity_search( query_text, k=1, filter={'document':mots_trouves[i] } ) similar_docs.append(result[0]) context="\n---------------------\n".join([similar_docs[i].page_content for i in range(len(similar_docs))]) system_message=" " prompt = f"[INST] <>\n As Djezzy's chatbot\nread each paraphrase in the context and Answer the question .\ndo not take into consideration the paragraphs which have no relation to the question\n if there is not a paragraph that is related to the question, respond that for this question it's best to reach out to our customer service team . They'll be able to assist you with your needs\n just give me the answer I don't want any other details \n ###context:{context}<>\n\n ###question: {query_text} [/INST]" #prompt = f" user \n read each paraphrase in the context and Answer the question .\ndo not take into consideration the paragraphs which have no relation to the question\n if there is not a paragraph that is related to the question, respond that for this question it's best to reach out to our customer service team . They'll be able to assist you with your needs\n just give me the answer I don't want any other details \n###context:\n{contexts[0]}\n ###question:\nWhat are the benefits of opting for the Djezzy Legend 100 DA package? \n###answer:\n{reponses[0]}\nuser \n read each paraphrase in the context and Answer the question .\ndo not take into consideration the paragraphs which have no relation to the question\n if there is not a paragraph that is related to the question, respond that for this question it's best to reach out to our customer service team . They'll be able to assist you with your needs\n just give me the answer I don't want any other details \n###context:\n{contexts[1]}\n ###question:\nWhat are the benefits of opting for the Djezzy Legend 100 DA package? \n###answer:\n{reponses[1]}\nuser read each paraphrase in the context and Answer the question .\ndo not take into consideration the paragraphs which have no relation to the question\n if there is not a paragraph that is related to the question, respond that for this question it's best to reach out to our customer service team . They'll be able to assist you with your needs\n just give me the answer I don't want any other details \n###context:\n{context}\n###question:\n{query_text}\n###answer:\n\n model" # replace the command here with something relevant to your task #pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer,temperature=0.1,top_p=0.9, max_length=4000) #result = pipe(prompt) #repons=result[0]['generated_text'].split('[/INST]')[1].strip() #generate=repons.replace("model", "") #generates.append(generate) #print(generate) #print(result[0]['generated_text']) return prompt