from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig,HfArgumentParser,TrainingArguments,pipeline, logging

import os,torch
from datasets import load_dataset
from langdetect import detect
from langdetect import detect_langs
from langdetect import DetectorFactory
import pandas as pd
import pyarrow as pa
import pyarrow.dataset as ds
from datasets import Dataset
import re
from langchain_community.embeddings import SentenceTransformerEmbeddings
from langchain_community.vectorstores import FAISS
from sklearn.metrics.pairwise import cosine_similarity
import json
import pickle
import numpy as np
import shutil
import tempfile

index_source='index.faiss'
hh_source='index.pkl'
model_name = "sentence-transformers/all-MiniLM-L6-v2"


embedding_llm = SentenceTransformerEmbeddings(model_name=model_name)

from transformers import T5Tokenizer, T5ForConditionalGeneration

tokenizer1 = T5Tokenizer.from_pretrained("google/flan-t5-base")
model1 = T5ForConditionalGeneration.from_pretrained("google/flan-t5-base")
with tempfile.TemporaryDirectory() as temp_dir:
    # Chemins des fichiers cibles dans le répertoire temporaire
    index_target = os.path.join(temp_dir, 'index.faiss')
    hh_target = os.path.join(temp_dir, 'index.pkl')
    
    # Copier les fichiers dans le répertoire temporaire
    shutil.copy(index_source, index_target)
    shutil.copy(hh_source, hh_target)
    
    # Charger la base de données FAISS à partir du répertoire temporaire
    vector_db = FAISS.load_local(temp_dir, embedding_llm, allow_dangerous_deserialization=True)


def load_data(text_filename='docs_text.json', embeddings_filename='docs_embeddings.json'):


    with open(text_filename, 'r', encoding='utf-8') as f:
        docs_text = json.load(f)


    with open(embeddings_filename, 'r') as f:
        docs_embeddings = json.load(f)

    return docs_text, docs_embeddings
#docs_text, docs_embeddings = load_data()
def mot_cle(path):
 with open(path, 'r') as fichier:
     contenu = fichier.read()

 # Séparer les mots en utilisant la virgule comme séparateur
 mots = contenu.split(',')

 # Afficher les mots pour vérifier
 for mot in mots:
     print(mot.strip())

 # stocker les mots dans un tableau (une liste)
 tableau_de_mots = [mot.strip() for mot in mots]
 return  tableau_de_mots


def pip(question,docs_text, docs_embeddings,mots_a_verifier,vector_db):
 query_text = question
 q1=question
 print(q1)
 detected_languages=detect_langs(question)
 main_language = max(detected_languages, key=lambda lang: lang.prob)
 lang = main_language.lang
 print(lang)


 if lang=='fr':
   input_text = f"translate french to English: {query_text}"
   input_ids = tokenizer1(input_text, return_tensors="pt").input_ids

   outputs = model1.generate(input_ids,max_length = 100)
   print(tokenizer1.decode(outputs[0]))
   text=tokenizer1.decode(outputs[0])
   cleaned_text = re.sub(r'<.*?>', '', text)  # Supprime les balises HTML
   cleaned_text = cleaned_text.strip()  # Enlève les espaces de début et de fin
   query_text=cleaned_text   
 
 query_embedding = embedding_llm.embed_query(query_text)
 query_embedding_array = np.array(query_embedding)
 docs_embeddings=np.array(docs_embeddings)


  # Question à analyser
 question = query_text
 print(question)
  # Convertir la question en une liste de mots
 mots_question = question.lower().split()
 bi_grammes = [' '.join([mots_question[i], mots_question[i+1]]) for i in range(len(mots_question)-1)]
  #mots_a_verifier_lower=[mot.lower() for mot in mots_a_verifier]
 mots_a_verifier_lower = {mot.lower(): mot for mot in mots_a_verifier}
 mots_question_lower=[mot.lower() for mot in mots_question]
 bi_grammes_lower=[mot.lower() for mot in bi_grammes]
  # Trouver les mots de la question qui sont dans le tableau
 mots_trouves1 = [mots_a_verifier_lower[mot] for mot in mots_a_verifier_lower if  mot in bi_grammes_lower]
 if not mots_trouves1:
  mots_trouves1 = [mots_a_verifier_lower[mot] for mot in mots_a_verifier_lower if mot in mots_question_lower ]
  # Afficher les mots trouvés

  mots_trouves=mots_trouves1
  if not mots_trouves:

   similarities = [cosine_similarity(doc.reshape(1,-1), query_embedding_array.reshape(1,-1)) for doc in docs_embeddings]
   print(similarities)
   sorted_docs = sorted(zip(docs_text, docs_embeddings, similarities), key=lambda x: x[2], reverse=True)
   similar_docs1 = [(doc,sim) for doc, _, sim in sorted_docs if sim > 0.72]
   if  not similar_docs1:
    similar_docs2 = [(doc,sim) for doc, _, sim in sorted_docs if sim > 0.65]
    if  not similar_docs2:
     similar_docs = [(doc,sim) for doc, _, sim in sorted_docs if sim > 0.4]
     if  not similar_docs:
       similar_docsA = [(doc,sim) for doc, _, sim in sorted_docs if (sim >= 0.3 and sim<0.4)]
       if  not similar_docsA:
         print("As a chatbot for Djezzy, I can provide information exclusively about our affiliated companies. Unfortunately, I'm unable to respond to inquiries outside of that scope.")
         prompt=" for this question write this answer and don't add anything  :As a chatbot for Djezzy, I can provide information exclusively about our affiliated companies. Unfortunately, I'm unable to respond to inquiries outside of that scope."
         if lang=='fr':
             prompt="for this question translate this answer in frensh  and write theme , don't add anything and don't mention that you translate the answer :As a chatbot for Djezzy, I can provide information exclusively about our affiliated companies. Unfortunately, I'm unable to respond to inquiries outside of that scope."
       else:
        print("I apologize, I don't fully understand your question. You can contact our customer service for answers to your needs, or if you can provide more details, I would be happy to help.")
        prompt="for this question write this answer and don't add anything: I apologize, I don't fully understand your question. You can contact our customer service for answers to your needs, or if you can provide more details, I would be happy to help."
        if lang=='fr':
             prompt="for this question translate this answer in frensh  and write theme,don't add anything and don't mention that you translate the answer :As a chatbot for Djezzy, I can provide information exclusively about our affiliated companies. Unfortunately, I'm unable to respond to inquiries outside of that scope."

        
     else:
        context="\n---------------------\n".join([doc for doc,_ in similar_docs[:4]]if len(similar_docs) >=3 else [doc for doc, _ in similar_docs[:1]])
        print(context)
        system_message=" "
        prompt = f"As Djezzy's chatbot\nread each paraphrase in the context and Answer  the  question .\ndo not take into consideration the paragraphs which have no relation to the question\n if there is not a paragraph that is related to the question, respond that for this question it's best to reach out to our customer service team . They'll be able to assist you with your needs\n just give me the answer I don't want any other details \n dont' mention that you used the provided context  ###context:{context}\n  ###question: {query_text} "
        if lang=='fr':
             prompt=f"[INST] <<SYS>>\n As Djezzy's chatbot\nread each paraphrase in the context and Answer  the  question .\ndo not take into consideration the paragraphs which have no relation to the question\n if there is not a paragraph that is related to the question, respond that for this question it's best to reach out to our customer service team . They'll be able to assist you with your needs\n just give me the answer I don't want any other details \n dont' mention that you used the provided context\n translate the answer in french and write theme ,don't mention that you translate the answer \n   ###context:{context}<</SYS>>\n\n  ###question: {query_text} [/INST]" 
        #prompt = f" <bos><start_of_turn>user \n read each paraphrase in the context and Answer  the  question .\ndo not take into consideration the paragraphs which have no relation to the question\n if there is not a paragraph that is related to the question, respond that for this question it's best to reach out to our customer service team . They'll be able to assist you with your needs\n just give me the answer I don't want any other details \n###context:\n{contexts[0]}\n ###question:\nWhat are the benefits of opting for the Djezzy Legend 100 DA package? \n###answer:\n{reponses[0]}<eos>\nuser \n read each paraphrase in the context and Answer  the  question .\ndo not take into consideration the paragraphs which have no relation to the question\n if there is not a paragraph that is related to the question, respond that for this question it's best to reach out to our customer service team . They'll be able to assist you with your needs\n just give me the answer I don't want any other details \n###context:\n{contexts[1]}\n ###question:\nWhat are the benefits of opting for the Djezzy Legend 100 DA package? \n###answer:\n{reponses[1]}<eos>\nuser read each paraphrase in the context and Answer  the  question .\ndo not take into consideration the paragraphs which have no relation to the question\n if there is not a paragraph that is related to the question, respond that for this question it's best to reach out to our customer service team . They'll be able to assist you with your needs\n just give me the answer I don't want any other details \n###context:\n{context}\n###question:\n{query_text}\n###answer:\n<end_of_turn>\n <start_of_turn>model" # replace the command here with something relevant to your task
        #pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer,temperature=0.1,top_p=0.9, max_length=4000)
        #result = pipe(prompt)
        #repons=result[0]['generated_text'].split('[/INST]')[1].strip()
        #generate=repons.replace("<start_of_turn>model", "")
        #generates.append(generate)
        #print(generate)
        #print(result[0]['generated_text'])
    else:
     context = "\n---------------------\n".join([doc for doc, _ in similar_docs2[:2]] if len(similar_docs2) >= 2 else [doc for doc, _ in similar_docs2[:1]])
     print(context)
     system_message=" "
     prompt = f"  As Djezzy's chatbot\nread each paraphrase in the context and Answer  the  question .\ndo not take into consideration the paragraphs which have no relation to the question\n if there is not a paragraph that is related to the question, respond that for this question it's best to reach out to our customer service team . They'll be able to assist you with your needs\n just give me the answer I don't want any other details \n ###context:{context}\n  ###question: {query_text} "
     if lang=='fr':
             prompt=f" As Djezzy's chatbot\nread each paraphrase in the context and Answer  the  question .\ndo not take into consideration the paragraphs which have no relation to the question\n if there is not a paragraph that is related to the question, respond that for this question it's best to reach out to our customer service team . They'll be able to assist you with your needs\n just give me the answer I don't want any other details \n dont' mention that you used the provided context\n translate the answer in french and write theme ,don't mention that you translate the answer\n   ###context:{context}\n  ###question: {query_text}" 
        
     #prompt = f" <bos><start_of_turn>user \n read each paraphrase in the context and Answer  the  question .\ndo not take into consideration the paragraphs which have no relation to the question\n if there is not a paragraph that is related to the question, respond that for this question it's best to reach out to our customer service team . They'll be able to assist you with your needs\n just give me the answer I don't want any other details \n###context:\n{contexts[0]}\n ###question:\nWhat are the benefits of opting for the Djezzy Legend 100 DA package? \n###answer:\n{reponses[0]}<eos>\nuser \n read each paraphrase in the context and Answer  the  question .\ndo not take into consideration the paragraphs which have no relation to the question\n if there is not a paragraph that is related to the question, respond that for this question it's best to reach out to our customer service team . They'll be able to assist you with your needs\n just give me the answer I don't want any other details \n###context:\n{contexts[1]}\n ###question:\nWhat are the benefits of opting for the Djezzy Legend 100 DA package? \n###answer:\n{reponses[1]}<eos>\nuser read each paraphrase in the context and Answer  the  question .\ndo not take into consideration the paragraphs which have no relation to the question\n if there is not a paragraph that is related to the question, respond that for this question it's best to reach out to our customer service team . They'll be able to assist you with your needs\n just give me the answer I don't want any other details \n###context:\n{context}\n###question:\n{query_text}\n###answer:\n<end_of_turn>\n <start_of_turn>model" # replace the command here with something relevant to your task
     #pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer,temperature=0.1,top_p=0.9, max_length=4000)
     #result = pipe(prompt)
     #repons=result[0]['generated_text'].split('[/INST]')[1].strip()
     #generate=repons.replace("<start_of_turn>model", "")
     #generates.append(generate)
     #print(generate)
     #print(result[0]['generated_text'])

   else:
    context="\n---------------------\n".join([doc for doc,_ in similar_docs1[:1]])
    print(context)
    system_message=" "
    prompt = f"As Djezzy's chatbot\nread 3 times  each paraphrase in the context and Answer  the  question .\ndo not take into consideration the paragraphs which have no relation to the question\n if there is not a paragraph that is related to the question, respond that for this question it's best to reach out to our customer service team . They'll be able to assist you with your needs\n just give me the answer I don't want any other details \n differentiates between each price and gives the correct answer and does not distinguish between the offers of each price\n ###context:{context}\n {query_text}"
    if lang=='fr':
             prompt=f" As Djezzy's chatbot\nread each paraphrase in the context and Answer  the  question .\ndo not take into consideration the paragraphs which have no relation to the question\n if there is not a paragraph that is related to the question, respond that for this question it's best to reach out to our customer service team . They'll be able to assist you with your needs\n just give me the answer I don't want any other details \n dont' mention that you used the provided context\n translate the answer in french ,don't mention that you translate the answer\n   ###context:{context}\n  ###question: {q1} " 
           
    #prompt = f" <bos><start_of_turn>user \n read each paraphrase in the context and Answer  the  question .\ndo not take into consideration the paragraphs which have no relation to the question\n if there is not a paragraph that is related to the question, respond that for this question it's best to reach out to our customer service team . They'll be able to assist you with your needs\n just give me the answer I don't want any other details \n###context:\n{contexts[0]}\n ###question:\nWhat are the benefits of opting for the Djezzy Legend 100 DA package? \n###answer:\n{reponses[0]}<eos>\nuser \n read each paraphrase in the context and Answer  the  question .\ndo not take into consideration the paragraphs which have no relation to the question\n if there is not a paragraph that is related to the question, respond that for this question it's best to reach out to our customer service team . They'll be able to assist you with your needs\n just give me the answer I don't want any other details \n###context:\n{contexts[1]}\n ###question:\nWhat are the benefits of opting for the Djezzy Legend 100 DA package? \n###answer:\n{reponses[1]}<eos>\nuser read each paraphrase in the context and Answer  the  question .\ndo not take into consideration the paragraphs which have no relation to the question\n if there is not a paragraph that is related to the question, respond that for this question it's best to reach out to our customer service team . They'll be able to assist you with your needs\n just give me the answer I don't want any other details \n###context:\n{context}\n###question:\n{query_text}\n###answer:\n<end_of_turn>\n <start_of_turn>model" # replace the command here with something relevant to your task
    #pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer,temperature=0.1,top_p=0.9, max_length=4000)
    #result = pipe(prompt)
    #repons=result[0]['generated_text'].split('[/INST]')[1].strip()
    #generate=repons.replace("<start_of_turn>model", "")
    #generates.append(generate)
    #print(generate)
    #print(result[0]['generated_text'])
  else:
     i=0
     similar_docs=[]
     for i in range(len(mots_trouves)):
       k=mots_trouves[i]
       result=vector_db.similarity_search(
                                       query_text,
                                       k=1,
                                       filter={'document':mots_trouves[i] }
                                     )
       similar_docs.append(result[0])
     context="\n---------------------\n".join([similar_docs[i].page_content for i in range(len(similar_docs))])
     print(context)
     system_message=" "
     prompt = f" As Djezzy's chatbot\nread each paraphrase in the context and Answer  the  question .\ndo not take into consideration the paragraphs which have no relation to the question\n if there is not a paragraph that is related to the question, respond that for this question it's best to reach out to our customer service team . They'll be able to assist you with your needs\n just give me the answer I don't want any other details \n ###context:{context}\n  ###question: {query_text} "
     if lang=='fr':
             prompt=f" As Djezzy's chatbot\nread each paraphrase in the context and Answer  the  question .\ndo not take into consideration the paragraphs which have no relation to the question\n if there is not a paragraph that is related to the question, respond that for this question it's best to reach out to our customer service team . They'll be able to assist you with your needs\n just give me the answer I don't want any other details \n dont' mention that you used the provided context\n give me the  answer in french language  \n ###context:{context}\n  ###question: {q1}" 
         
     #prompt = f" <bos><start_of_turn>user \n read each paraphrase in the context and Answer  the  question .\ndo not take into consideration the paragraphs which have no relation to the question\n if there is not a paragraph that is related to the question, respond that for this question it's best to reach out to our customer service team . They'll be able to assist you with your needs\n just give me the answer I don't want any other details \n###context:\n{contexts[0]}\n ###question:\nWhat are the benefits of opting for the Djezzy Legend 100 DA package? \n###answer:\n{reponses[0]}<eos>\nuser \n read each paraphrase in the context and Answer  the  question .\ndo not take into consideration the paragraphs which have no relation to the question\n if there is not a paragraph that is related to the question, respond that for this question it's best to reach out to our customer service team . They'll be able to assist you with your needs\n just give me the answer I don't want any other details \n###context:\n{contexts[1]}\n ###question:\nWhat are the benefits of opting for the Djezzy Legend 100 DA package? \n###answer:\n{reponses[1]}<eos>\nuser read each paraphrase in the context and Answer  the  question .\ndo not take into consideration the paragraphs which have no relation to the question\n if there is not a paragraph that is related to the question, respond that for this question it's best to reach out to our customer service team . They'll be able to assist you with your needs\n just give me the answer I don't want any other details \n###context:\n{context}\n###question:\n{query_text}\n###answer:\n<end_of_turn>\n <start_of_turn>model" # replace the command here with something relevant to your task
     #pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer,temperature=0.1,top_p=0.9, max_length=4000)
     #result = pipe(prompt)
     #repons=result[0]['generated_text'].split('[/INST]')[1].strip()
     #generate=repons.replace("<start_of_turn>model", "")
     #generates.append(generate)
     #print(generate)
     #print(result[0]['generated_text'])
 return prompt