xavierbarbier commited on
Commit
4c8a6f3
·
verified ·
1 Parent(s): 28dbce4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -13
app.py CHANGED
@@ -5,6 +5,8 @@ import faiss
5
  #from langchain_community.embeddings import HuggingFaceEmbeddings
6
  from langchain_huggingface import HuggingFaceEmbeddings
7
  import numpy as np
 
 
8
 
9
  title = "Mistral-7B-Instruct-GGUF Run On CPU-Basic Free Hardware"
10
 
@@ -26,6 +28,34 @@ hf_hub_download(repo_id="TheBloke/Mistral-7B-Instruct-v0.1-GGUF", filename=model
26
 
27
  print("Start the model init process")
28
  model = model = GPT4All(model_name, model_path, allow_download = False, device="cpu")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  print("Finish the model init process")
30
 
31
  model.config["promptTemplate"] = "[INST] {0} [/INST]"
@@ -44,28 +74,17 @@ embeddings = HuggingFaceEmbeddings(
44
 
45
  #index = faiss.load_local("resourse/embeddings_ngap.faiss")
46
 
47
- def get_text_embedding(text):
48
 
49
- return embeddings.embed_query(text)
50
 
51
  def generater(message, history, temperature, top_p, top_k):
52
  prompt = "<s>"
53
  for user_message, assistant_message in history:
54
  prompt += model.config["promptTemplate"].format(user_message)
55
-
56
- question = prompt
57
- question_embeddings = np.array([get_text_embedding(prompt)])
58
- D, I = index.search(question_embeddings, k=2) # distance, index
59
- retrieved_chunk = [chunks[i] for i in I.tolist()[0]]
60
-
61
 
 
62
 
63
- prompt += assistant_message + " Contexte:" + retrieved_chunk + "</s>"
64
  prompt += model.config["promptTemplate"].format(message)
65
-
66
-
67
-
68
-
69
  outputs = []
70
  for token in model.generate(prompt=prompt, temp=temperature, top_k = top_k, top_p = top_p, max_tokens = max_new_tokens, streaming=True):
71
  outputs.append(token)
 
5
  #from langchain_community.embeddings import HuggingFaceEmbeddings
6
  from langchain_huggingface import HuggingFaceEmbeddings
7
  import numpy as np
8
+ from pypdf import PdfReader
9
+
10
 
11
  title = "Mistral-7B-Instruct-GGUF Run On CPU-Basic Free Hardware"
12
 
 
28
 
29
  print("Start the model init process")
30
  model = model = GPT4All(model_name, model_path, allow_download = False, device="cpu")
31
+
32
+
33
+
34
+ # creating a pdf reader object
35
+ reader = PdfReader("resourse/NGAP 01042024.pdf")
36
+ text = []
37
+ for p in np.arange(0, len(reader.pages), 1):
38
+ page = reader.pages[int(p)]
39
+
40
+ # extracting text from page
41
+ text.append(page.extract_text())
42
+
43
+ text = ' '.join(text)
44
+
45
+ chunk_size = 2048
46
+ chunks = [text[i:i + chunk_size] for i in range(0, len(text), chunk_size)]
47
+
48
+ def get_text_embedding(text):
49
+
50
+ return embeddings.embed_query(text)
51
+ text_embeddings = np.array([get_text_embedding(chunk) for chunk in chunks])
52
+
53
+ d = text_embeddings.shape[1]
54
+ index = faiss.IndexFlatL2(d)
55
+ index.add(text_embeddings)
56
+
57
+
58
+
59
  print("Finish the model init process")
60
 
61
  model.config["promptTemplate"] = "[INST] {0} [/INST]"
 
74
 
75
  #index = faiss.load_local("resourse/embeddings_ngap.faiss")
76
 
 
77
 
 
78
 
79
  def generater(message, history, temperature, top_p, top_k):
80
  prompt = "<s>"
81
  for user_message, assistant_message in history:
82
  prompt += model.config["promptTemplate"].format(user_message)
 
 
 
 
 
 
83
 
84
+ prompt += assistant_message "</s>"
85
 
 
86
  prompt += model.config["promptTemplate"].format(message)
87
+
 
 
 
88
  outputs = []
89
  for token in model.generate(prompt=prompt, temp=temperature, top_k = top_k, top_p = top_p, max_tokens = max_new_tokens, streaming=True):
90
  outputs.append(token)