tarekfer8 commited on
Commit
d236dca
·
verified ·
1 Parent(s): 38f4aec

Update djezzy.py

Browse files
Files changed (1) hide show
  1. djezzy.py +30 -2
djezzy.py CHANGED
@@ -4,7 +4,9 @@ from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
4
 
5
  import os
6
  from datasets import load_dataset
7
-
 
 
8
  import pandas as pd
9
  import pyarrow as pa
10
  import pyarrow.dataset as ds
@@ -23,6 +25,11 @@ hh_source='index.pkl'
23
  model_name = "sentence-transformers/all-MiniLM-L6-v2"
24
 
25
  embedding_llm = SentenceTransformerEmbeddings(model_name=model_name)
 
 
 
 
 
26
  with tempfile.TemporaryDirectory() as temp_dir:
27
  # Chemins des fichiers cibles dans le répertoire temporaire
28
  index_target = os.path.join(temp_dir, 'index.faiss')
@@ -69,6 +76,23 @@ def mot_cle(path):
69
 
70
  def pip(question,docs_text, docs_embeddings,mots_a_verifier,vector_db):
71
  query_text = question
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
  query_embedding = embedding_llm.embed_query(query_text)
73
  query_embedding_array = np.array(query_embedding)
74
  docs_embeddings=np.array(docs_embeddings)
@@ -106,10 +130,14 @@ def pip(question,docs_text, docs_embeddings,mots_a_verifier,vector_db):
106
  if not similar_docsA:
107
  print("As a chatbot for Djezzy, I can provide information exclusively about our affiliated companies. Unfortunately, I'm unable to respond to inquiries outside of that scope.")
108
  prompt=" for this question write this answer and don't add anything :As a chatbot for Djezzy, I can provide information exclusively about our affiliated companies. Unfortunately, I'm unable to respond to inquiries outside of that scope."
109
-
 
110
  else:
111
  print("I apologize, I don't fully understand your question. You can contact our customer service for answers to your needs, or if you can provide more details, I would be happy to help.")
112
  prompt="for this question write this answer and don't add anything: I apologize, I don't fully understand your question. You can contact our customer service for answers to your needs, or if you can provide more details, I would be happy to help."
 
 
 
113
 
114
  else:
115
  context="\n---------------------\n".join([doc for doc,_ in similar_docs[:4]]if len(similar_docs) >=3 else [doc for doc, _ in similar_docs[:1]])
 
4
 
5
  import os
6
  from datasets import load_dataset
7
+ from langdetect import detect
8
+ from langdetect import detect_langs
9
+ from langdetect import DetectorFactory
10
  import pandas as pd
11
  import pyarrow as pa
12
  import pyarrow.dataset as ds
 
25
  model_name = "sentence-transformers/all-MiniLM-L6-v2"
26
 
27
  embedding_llm = SentenceTransformerEmbeddings(model_name=model_name)
28
+
29
+ from transformers import T5Tokenizer, T5ForConditionalGeneration
30
+
31
+ tokenizer1 = T5Tokenizer.from_pretrained("google/flan-t5-xl")
32
+ model1 = T5ForConditionalGeneration.from_pretrained("google/flan-t5-xl", device_map=device_map, load_in_8bit=True)
33
  with tempfile.TemporaryDirectory() as temp_dir:
34
  # Chemins des fichiers cibles dans le répertoire temporaire
35
  index_target = os.path.join(temp_dir, 'index.faiss')
 
76
 
77
  def pip(question,docs_text, docs_embeddings,mots_a_verifier,vector_db):
78
  query_text = question
79
+ detected_languages=detect_langs(question)
80
+ main_language = max(detected_languages, key=lambda lang: lang.prob)
81
+ lang = main_language.lang
82
+
83
+
84
+
85
+ if lang=='fr':
86
+ input_text = f"translate french to English: {query_text}"
87
+ input_ids = tokenizer1(input_text, return_tensors="pt").input_ids
88
+
89
+ outputs = model1.generate(input_ids,max_length = 100)
90
+ print(tokenizer1.decode(outputs[0]))
91
+ text=tokenizer1.decode(outputs[0])
92
+ cleaned_text = re.sub(r'<.*?>', '', text) # Supprime les balises HTML
93
+ cleaned_text = cleaned_text.strip() # Enlève les espaces de début et de fin
94
+ query_text=cleaned_text
95
+
96
  query_embedding = embedding_llm.embed_query(query_text)
97
  query_embedding_array = np.array(query_embedding)
98
  docs_embeddings=np.array(docs_embeddings)
 
130
  if not similar_docsA:
131
  print("As a chatbot for Djezzy, I can provide information exclusively about our affiliated companies. Unfortunately, I'm unable to respond to inquiries outside of that scope.")
132
  prompt=" for this question write this answer and don't add anything :As a chatbot for Djezzy, I can provide information exclusively about our affiliated companies. Unfortunately, I'm unable to respond to inquiries outside of that scope."
133
+ if lang=='fr':
134
+ prompt="pour cette question écrivez cette réponse et n'ajoutez rien :En tant que chatbot pour Djezzy, je peux fournir des informations exclusivement sur nos sociétés affiliées. Malheureusement, je ne suis pas en mesure de répondre aux demandes en dehors de ce cadre."
135
  else:
136
  print("I apologize, I don't fully understand your question. You can contact our customer service for answers to your needs, or if you can provide more details, I would be happy to help.")
137
  prompt="for this question write this answer and don't add anything: I apologize, I don't fully understand your question. You can contact our customer service for answers to your needs, or if you can provide more details, I would be happy to help."
138
+ if lang=='fr':
139
+ prompt="pour cette question écrivez cette réponse et n'ajoutez rien:Je m'excuse, je ne comprends pas bien votre question. Vous pouvez contacter notre service client pour obtenir des réponses à vos besoins, ou si vous pouvez fournir plus de détails, je serai heureux de vous aider."
140
+
141
 
142
  else:
143
  context="\n---------------------\n".join([doc for doc,_ in similar_docs[:4]]if len(similar_docs) >=3 else [doc for doc, _ in similar_docs[:1]])