aizanlabs commited on
Commit
907f11e
·
verified ·
1 Parent(s): 415e9cd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -93
app.py CHANGED
@@ -1,5 +1,3 @@
1
- "Single Thread"
2
-
3
  import os
4
  import multiprocessing
5
  import concurrent.futures
@@ -20,7 +18,7 @@ class DocumentRetrievalAndGeneration:
20
  def __init__(self, embedding_model_name, lm_model_id, data_folder):
21
  self.all_splits = self.load_documents(data_folder)
22
  self.embeddings = SentenceTransformer(embedding_model_name)
23
- self.gpu_index = self.create_faiss_index()
24
  self.llm = self.initialize_llm(lm_model_id)
25
 
26
  def load_documents(self, folder_path):
@@ -30,8 +28,6 @@ class DocumentRetrievalAndGeneration:
30
  all_splits = text_splitter.split_documents(documents)
31
  print('Length of documents:', len(documents))
32
  print("LEN of all_splits", len(all_splits))
33
- # for i in range(5):
34
- # print(all_splits[i].page_content)
35
  return all_splits
36
 
37
  def create_faiss_index(self):
@@ -39,9 +35,7 @@ class DocumentRetrievalAndGeneration:
39
  embeddings = self.embeddings.encode(all_texts, convert_to_tensor=True).cpu().numpy()
40
  index = faiss.IndexFlatL2(embeddings.shape[1])
41
  index.add(embeddings)
42
- gpu_resource = faiss.StandardGpuResources()
43
- gpu_index = faiss.index_cpu_to_gpu(gpu_resource, 0, index)
44
- return gpu_index
45
 
46
  def initialize_llm(self, model_id):
47
  bnb_config = BitsAndBytesConfig(
@@ -75,7 +69,7 @@ class DocumentRetrievalAndGeneration:
75
 
76
  def query_and_generate_response(self, query):
77
  query_embedding = self.embeddings.encode(query, convert_to_tensor=True).cpu().numpy()
78
- distances, indices = self.gpu_index.search(np.array([query_embedding]), k=5)
79
 
80
  content = ""
81
  for idx in indices[0]:
@@ -113,20 +107,15 @@ class DocumentRetrievalAndGeneration:
113
  Solution:"NO SOLUTION AVAILABLE"
114
  </s>
115
  """
116
- # prompt = f"Query: {query}\nSolution: {content}\n"
117
 
118
- # Encode and prepare inputs
119
  messages = [{"role": "user", "content": prompt}]
120
  encodeds = self.llm.tokenizer.apply_chat_template(messages, return_tensors="pt")
121
  model_inputs = encodeds.to(self.llm.device)
122
 
123
- # Perform inference and measure time
124
  start_time = datetime.now()
125
  generated_ids = self.generate_response_with_timeout(model_inputs)
126
- # generated_ids = self.llm.model.generate(model_inputs, max_new_tokens=1000, do_sample=True)
127
  elapsed_time = datetime.now() - start_time
128
 
129
- # Decode and return output
130
  decoded = self.llm.tokenizer.batch_decode(generated_ids)
131
  generated_response = decoded[0]
132
  match1 = re.search(r'\[/INST\](.*?)</s>', generated_response, re.DOTALL)
@@ -134,13 +123,10 @@ class DocumentRetrievalAndGeneration:
134
  match2 = re.search(r'Solution:(.*?)</s>', generated_response, re.DOTALL | re.IGNORECASE)
135
  if match1:
136
  solution_text = match1.group(1).strip()
137
- print(solution_text)
138
  if "Solution:" in solution_text:
139
  solution_text = solution_text.split("Solution:", 1)[1].strip()
140
  elif match2:
141
  solution_text = match2.group(1).strip()
142
- print(solution_text)
143
-
144
  else:
145
  solution_text=generated_response
146
  print("Generated response:", generated_response)
@@ -154,77 +140,12 @@ class DocumentRetrievalAndGeneration:
154
  return response
155
 
156
  if __name__ == "__main__":
157
- # Example usage
158
  embedding_model_name = 'flax-sentence-embeddings/all_datasets_v3_MiniLM-L12'
159
  lm_model_id = "mistralai/Mistral-7B-Instruct-v0.2"
160
  data_folder = 'text_files'
161
 
162
  doc_retrieval_gen = DocumentRetrievalAndGeneration(embedding_model_name, lm_model_id, data_folder)
163
 
164
- # """Dual Interface"""
165
-
166
- # def launch_interface():
167
- # css_code = """
168
- # .gradio-container {
169
- # background-color: #daccdb;
170
- # }
171
- # /* Button styling for all buttons */
172
- # button {
173
- # background-color: #927fc7; /* Default color for all other buttons */
174
- # color: black;
175
- # border: 1px solid black;
176
- # padding: 10px;
177
- # margin-right: 10px;
178
- # font-size: 16px; /* Increase font size */
179
- # font-weight: bold; /* Make text bold */
180
- # }
181
- # """
182
- # EXAMPLES = [
183
- # "On which devices can the VIP and CSI2 modules operate simultaneously?",
184
- # "I'm using Code Composer Studio 5.4.0.00091 and enabled FPv4SPD16 floating point support for CortexM4 in TDA2. However, after building the project, the .asm file shows --float_support=vfplib instead of FPv4SPD16. Why is this happening?",
185
- # "Could you clarify the maximum number of cameras that can be connected simultaneously to the video input ports on the TDA2x SoC, considering it supports up to 10 multiplexed input ports and includes 3 dedicated video input modules?"
186
- # ]
187
-
188
- # file_path = "ticketNames.txt"
189
-
190
- # # Read the file content
191
- # with open(file_path, "r") as file:
192
- # content = file.read()
193
- # ticket_names = json.loads(content)
194
- # dropdown = gr.Dropdown(label="Sample queries", choices=ticket_names)
195
-
196
- # # Define Gradio interfaces
197
- # tab1 = gr.Interface(
198
- # fn=doc_retrieval_gen.qa_infer_gradio,
199
- # inputs=[gr.Textbox(label="QUERY", placeholder="Enter your query here")],
200
- # allow_flagging='never',
201
- # examples=EXAMPLES,
202
- # cache_examples=False,
203
- # outputs=[gr.Textbox(label="SOLUTION"), gr.Textbox(label="RELATED QUERIES")],
204
- # css=css_code
205
- # )
206
- # tab2 = gr.Interface(
207
- # fn=doc_retrieval_gen.qa_infer_gradio,
208
- # inputs=[dropdown],
209
- # allow_flagging='never',
210
- # outputs=[gr.Textbox(label="SOLUTION"), gr.Textbox(label="RELATED QUERIES")],
211
- # css=css_code
212
- # )
213
-
214
- # # Combine interfaces into a tabbed interface
215
- # gr.TabbedInterface(
216
- # [tab1, tab2],
217
- # ["Textbox Input", "FAQs"],
218
- # title="TI E2E FORUM",
219
- # css=css_code
220
- # ).launch(debug=True)
221
-
222
- # # Launch the interface
223
- # launch_interface()
224
-
225
-
226
-
227
- """Single Interface"""
228
  def launch_interface():
229
  css_code = """
230
  .gradio-container {
@@ -245,15 +166,6 @@ if __name__ == "__main__":
245
  "I'm using Code Composer Studio 5.4.0.00091 and enabled FPv4SPD16 floating point support for CortexM4 in TDA2. However, after building the project, the .asm file shows --float_support=vfplib instead of FPv4SPD16. Why is this happening?",
246
  "Could you clarify the maximum number of cameras that can be connected simultaneously to the video input ports on the TDA2x SoC, considering it supports up to 10 multiplexed input ports and includes 3 dedicated video input modules?"]
247
 
248
- # file_path = "ticketNames.txt"
249
-
250
- # # Read the file content
251
- # with open(file_path, "r") as file:
252
- # content = file.read()
253
- # ticket_names = json.loads(content)
254
- # dropdown = gr.Dropdown(label="Sample queries", choices=ticket_names)
255
-
256
- # Define Gradio interface
257
  interface = gr.Interface(
258
  fn=doc_retrieval_gen.qa_infer_gradio,
259
  inputs=[gr.Textbox(label="QUERY", placeholder="Enter your query here")],
@@ -264,8 +176,6 @@ if __name__ == "__main__":
264
  css=css_code
265
  )
266
 
267
- # Launch Gradio interface
268
  interface.launch(debug=True)
269
 
270
- # Launch the interface
271
  launch_interface()
 
 
 
1
  import os
2
  import multiprocessing
3
  import concurrent.futures
 
18
  def __init__(self, embedding_model_name, lm_model_id, data_folder):
19
  self.all_splits = self.load_documents(data_folder)
20
  self.embeddings = SentenceTransformer(embedding_model_name)
21
+ self.cpu_index = self.create_faiss_index()
22
  self.llm = self.initialize_llm(lm_model_id)
23
 
24
  def load_documents(self, folder_path):
 
28
  all_splits = text_splitter.split_documents(documents)
29
  print('Length of documents:', len(documents))
30
  print("LEN of all_splits", len(all_splits))
 
 
31
  return all_splits
32
 
33
  def create_faiss_index(self):
 
35
  embeddings = self.embeddings.encode(all_texts, convert_to_tensor=True).cpu().numpy()
36
  index = faiss.IndexFlatL2(embeddings.shape[1])
37
  index.add(embeddings)
38
+ return index
 
 
39
 
40
  def initialize_llm(self, model_id):
41
  bnb_config = BitsAndBytesConfig(
 
69
 
70
  def query_and_generate_response(self, query):
71
  query_embedding = self.embeddings.encode(query, convert_to_tensor=True).cpu().numpy()
72
+ distances, indices = self.cpu_index.search(np.array([query_embedding]), k=5)
73
 
74
  content = ""
75
  for idx in indices[0]:
 
107
  Solution:"NO SOLUTION AVAILABLE"
108
  </s>
109
  """
 
110
 
 
111
  messages = [{"role": "user", "content": prompt}]
112
  encodeds = self.llm.tokenizer.apply_chat_template(messages, return_tensors="pt")
113
  model_inputs = encodeds.to(self.llm.device)
114
 
 
115
  start_time = datetime.now()
116
  generated_ids = self.generate_response_with_timeout(model_inputs)
 
117
  elapsed_time = datetime.now() - start_time
118
 
 
119
  decoded = self.llm.tokenizer.batch_decode(generated_ids)
120
  generated_response = decoded[0]
121
  match1 = re.search(r'\[/INST\](.*?)</s>', generated_response, re.DOTALL)
 
123
  match2 = re.search(r'Solution:(.*?)</s>', generated_response, re.DOTALL | re.IGNORECASE)
124
  if match1:
125
  solution_text = match1.group(1).strip()
 
126
  if "Solution:" in solution_text:
127
  solution_text = solution_text.split("Solution:", 1)[1].strip()
128
  elif match2:
129
  solution_text = match2.group(1).strip()
 
 
130
  else:
131
  solution_text=generated_response
132
  print("Generated response:", generated_response)
 
140
  return response
141
 
142
  if __name__ == "__main__":
 
143
  embedding_model_name = 'flax-sentence-embeddings/all_datasets_v3_MiniLM-L12'
144
  lm_model_id = "mistralai/Mistral-7B-Instruct-v0.2"
145
  data_folder = 'text_files'
146
 
147
  doc_retrieval_gen = DocumentRetrievalAndGeneration(embedding_model_name, lm_model_id, data_folder)
148
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
149
  def launch_interface():
150
  css_code = """
151
  .gradio-container {
 
166
  "I'm using Code Composer Studio 5.4.0.00091 and enabled FPv4SPD16 floating point support for CortexM4 in TDA2. However, after building the project, the .asm file shows --float_support=vfplib instead of FPv4SPD16. Why is this happening?",
167
  "Could you clarify the maximum number of cameras that can be connected simultaneously to the video input ports on the TDA2x SoC, considering it supports up to 10 multiplexed input ports and includes 3 dedicated video input modules?"]
168
 
 
 
 
 
 
 
 
 
 
169
  interface = gr.Interface(
170
  fn=doc_retrieval_gen.qa_infer_gradio,
171
  inputs=[gr.Textbox(label="QUERY", placeholder="Enter your query here")],
 
176
  css=css_code
177
  )
178
 
 
179
  interface.launch(debug=True)
180
 
 
181
  launch_interface()