Spaces:

tarekfer8
/

tarek

Sleeping

App Files Files Community

tarekfer8 commited on Jun 8, 2024

Commit

d236dca

verified ·

1 Parent(s): 38f4aec

Update djezzy.py

Browse files

Files changed (1) hide show

djezzy.py +30 -2

djezzy.py CHANGED Viewed

@@ -4,7 +4,9 @@ from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
 import os
 from datasets import load_dataset
 import pandas as pd
 import pyarrow as pa
 import pyarrow.dataset as ds
@@ -23,6 +25,11 @@ hh_source='index.pkl'
 model_name = "sentence-transformers/all-MiniLM-L6-v2"
 embedding_llm = SentenceTransformerEmbeddings(model_name=model_name)
 with tempfile.TemporaryDirectory() as temp_dir:
     # Chemins des fichiers cibles dans le répertoire temporaire
     index_target = os.path.join(temp_dir, 'index.faiss')
@@ -69,6 +76,23 @@ def mot_cle(path):
 def pip(question,docs_text, docs_embeddings,mots_a_verifier,vector_db):
  query_text = question
  query_embedding = embedding_llm.embed_query(query_text)
  query_embedding_array = np.array(query_embedding)
  docs_embeddings=np.array(docs_embeddings)
@@ -106,10 +130,14 @@ def pip(question,docs_text, docs_embeddings,mots_a_verifier,vector_db):
        if  not similar_docsA:
          print("As a chatbot for Djezzy, I can provide information exclusively about our affiliated companies. Unfortunately, I'm unable to respond to inquiries outside of that scope.")
          prompt=" for this question write this answer and don't add anything  :As a chatbot for Djezzy, I can provide information exclusively about our affiliated companies. Unfortunately, I'm unable to respond to inquiries outside of that scope."
        else:
         print("I apologize, I don't fully understand your question. You can contact our customer service for answers to your needs, or if you can provide more details, I would be happy to help.")
         prompt="for this question write this answer and don't add anything: I apologize, I don't fully understand your question. You can contact our customer service for answers to your needs, or if you can provide more details, I would be happy to help."
      else:
         context="\n---------------------\n".join([doc for doc,_ in similar_docs[:4]]if len(similar_docs) >=3 else [doc for doc, _ in similar_docs[:1]])

 import os
 from datasets import load_dataset
+from langdetect import detect
+from langdetect import detect_langs
+from langdetect import DetectorFactory
 import pandas as pd
 import pyarrow as pa
 import pyarrow.dataset as ds
 model_name = "sentence-transformers/all-MiniLM-L6-v2"
 embedding_llm = SentenceTransformerEmbeddings(model_name=model_name)
+from transformers import T5Tokenizer, T5ForConditionalGeneration
+tokenizer1 = T5Tokenizer.from_pretrained("google/flan-t5-xl")
+model1 = T5ForConditionalGeneration.from_pretrained("google/flan-t5-xl", device_map=device_map, load_in_8bit=True)
 with tempfile.TemporaryDirectory() as temp_dir:
     # Chemins des fichiers cibles dans le répertoire temporaire
     index_target = os.path.join(temp_dir, 'index.faiss')
 def pip(question,docs_text, docs_embeddings,mots_a_verifier,vector_db):
  query_text = question
+ detected_languages=detect_langs(question)
+ main_language = max(detected_languages, key=lambda lang: lang.prob)
+ lang = main_language.lang
+ if lang=='fr':
+   input_text = f"translate french to English: {query_text}"
+   input_ids = tokenizer1(input_text, return_tensors="pt").input_ids
+   outputs = model1.generate(input_ids,max_length = 100)
+   print(tokenizer1.decode(outputs[0]))
+   text=tokenizer1.decode(outputs[0])
+   cleaned_text = re.sub(r'<.*?>', '', text)  # Supprime les balises HTML
+   cleaned_text = cleaned_text.strip()  # Enlève les espaces de début et de fin
+   query_text=cleaned_text
  query_embedding = embedding_llm.embed_query(query_text)
  query_embedding_array = np.array(query_embedding)
  docs_embeddings=np.array(docs_embeddings)
        if  not similar_docsA:
          print("As a chatbot for Djezzy, I can provide information exclusively about our affiliated companies. Unfortunately, I'm unable to respond to inquiries outside of that scope.")
          prompt=" for this question write this answer and don't add anything  :As a chatbot for Djezzy, I can provide information exclusively about our affiliated companies. Unfortunately, I'm unable to respond to inquiries outside of that scope."
+         if lang=='fr':
+             prompt="pour cette question écrivez cette réponse et n'ajoutez rien :En tant que chatbot pour Djezzy, je peux fournir des informations exclusivement sur nos sociétés affiliées. Malheureusement, je ne suis pas en mesure de répondre aux demandes en dehors de ce cadre."
        else:
         print("I apologize, I don't fully understand your question. You can contact our customer service for answers to your needs, or if you can provide more details, I would be happy to help.")
         prompt="for this question write this answer and don't add anything: I apologize, I don't fully understand your question. You can contact our customer service for answers to your needs, or if you can provide more details, I would be happy to help."
+        if lang=='fr':
+             prompt="pour cette question écrivez cette réponse et n'ajoutez rien:Je m'excuse, je ne comprends pas bien votre question. Vous pouvez contacter notre service client pour obtenir des réponses à vos besoins, ou si vous pouvez fournir plus de détails, je serai heureux de vous aider."
      else:
         context="\n---------------------\n".join([doc for doc,_ in similar_docs[:4]]if len(similar_docs) >=3 else [doc for doc, _ in similar_docs[:1]])