tarekfer8 commited on
Commit
a8f4003
·
verified ·
1 Parent(s): b6f80fb

Update djezzy.py

Browse files
Files changed (1) hide show
  1. djezzy.py +58 -0
djezzy.py CHANGED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ %%capture
2
+ !pip install -U bitsandbytes
3
+ !pip install -U peft
4
+ !pip install -U accelerate
5
+ !pip install -U trl
6
+ !pip install datasets==2.16.0
7
+ !pip install sentencepiece
8
+ !pip install -q -U git+https://github.com/huggingface/transformers.git
9
+ !pip install "torch>=2.1.1" -U
10
+ !pip install accelerate
11
+ !pip install -q python-dotenv==1.0.0
12
+
13
+ !pip install -q pandas==1.5.3
14
+ !pip install -q unstructured==0.7.12
15
+ !pip install -q wikipedia==1.4.0
16
+ !pip install -q pypdf==3.12.0
17
+ !pip install -q jq==1.4.1
18
+ !pip install -q nltk==3.8.1
19
+ !pip install -q tiktoken==0.4.0
20
+ !pip install -q sentencepiece==0.1.99
21
+ !pip install -q sentence-transformers==2.2.2
22
+ !pip install -q cohere==4.11.2
23
+ !pip install -q faiss-cpu==1.7.4
24
+ !pip install -q chromadb==0.3.26
25
+ !pip install transformers
26
+ !pip install langchain
27
+ !pip install langchain-community
28
+ from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig,HfArgumentParser,TrainingArguments,pipeline, logging
29
+ from peft import LoraConfig, PeftModel, prepare_model_for_kbit_training, get_peft_model
30
+ import os,torch
31
+ from datasets import load_dataset
32
+ from trl import SFTTrainer
33
+ import pandas as pd
34
+ import pyarrow as pa
35
+ import pyarrow.dataset as ds
36
+ from datasets import Dataset
37
+ import re
38
+ import pandas as pd
39
+ import os
40
+
41
+ with open('/content/mots_clés.txt', 'r') as fichier:
42
+ contenu = fichier.read()
43
+
44
+ # Séparer les mots en utilisant la virgule comme séparateur
45
+ mots = contenu.split(',')
46
+
47
+ # Afficher les mots pour vérifier
48
+ for mot in mots:
49
+ print(mot.strip())
50
+
51
+ # stocker les mots dans un tableau (une liste)
52
+ tableau_de_mots = [mot.strip() for mot in mots]
53
+
54
+ from langchain.embeddings import SentenceTransformerEmbeddings
55
+
56
+ model_name = "sentence-transformers/all-MiniLM-L6-v2"
57
+
58
+ embedding_llm = SentenceTransformerEmbeddings(model_name=model_name)