arjunanand13 commited on
Commit
077ef59
·
verified ·
1 Parent(s): 5726d0d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -10
app.py CHANGED
@@ -13,17 +13,20 @@ import gradio as gr
13
 
14
  class DocumentRetrievalAndGeneration:
15
  def __init__(self, embedding_model_name, lm_model_id, data_folder, faiss_index_path):
16
- self.documents = self.load_documents(data_folder)
17
  self.embeddings = SentenceTransformer(embedding_model_name)
18
  self.gpu_index = self.load_faiss_index(faiss_index_path)
19
  self.llm = self.initialize_llm(lm_model_id)
20
- self.all_splits = self.split_documents()
21
 
22
  def load_documents(self, folder_path):
23
  loader = DirectoryLoader(folder_path, loader_cls=TextLoader)
 
24
  documents = loader.load()
 
25
  print('Length of documents:', len(documents))
26
- return documents
 
27
 
28
  def load_faiss_index(self, faiss_index_path):
29
  cpu_index = faiss.read_index(faiss_index_path)
@@ -51,12 +54,6 @@ class DocumentRetrievalAndGeneration:
51
  )
52
  return generate_text
53
 
54
- def split_documents(self):
55
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=5000, chunk_overlap=250)
56
- all_splits = text_splitter.split_documents(self.documents)
57
- print("LEN of all_splits", len(all_splits))
58
- return all_splits
59
-
60
  def query_and_generate_response(self, query):
61
  query_embedding = self.embeddings.encode(query, convert_to_tensor=True).cpu().numpy()
62
  distances, indices = self.gpu_index.search(np.array([query_embedding]), k=5)
@@ -65,6 +62,7 @@ class DocumentRetrievalAndGeneration:
65
  for idx in indices[0]:
66
  content += "-" * 50 + "\n"
67
  content += self.all_splits[idx].page_content + "\n"
 
68
  print(self.all_splits[idx].page_content)
69
  print("############################")
70
  prompt=f"""
@@ -136,7 +134,7 @@ if __name__ == "__main__":
136
  font-weight: bold; /* Make text bold */
137
  }
138
  """
139
- EXAMPLES = ["Does the VIP modules & CSI2 module could work simultaneously? ",
140
  "I'm using Code Composer Studio 5.4.0.00091 and enabled FPv4SPD16 floating point support for CortexM4 in TDA2. However, after building the project, the .asm file shows --float_support=vfplib instead of FPv4SPD16. Why is this happening?",
141
  "Could you clarify the maximum number of cameras that can be connected simultaneously to the video input ports on the TDA2x SoC, considering it supports up to 10 multiplexed input ports and includes 3 dedicated video input modules?"]
142
 
 
13
 
14
  class DocumentRetrievalAndGeneration:
15
  def __init__(self, embedding_model_name, lm_model_id, data_folder, faiss_index_path):
16
+ self.all_splits = self.load_documents(data_folder)
17
  self.embeddings = SentenceTransformer(embedding_model_name)
18
  self.gpu_index = self.load_faiss_index(faiss_index_path)
19
  self.llm = self.initialize_llm(lm_model_id)
20
+ # self.all_splits = self.split_documents()
21
 
22
  def load_documents(self, folder_path):
23
  loader = DirectoryLoader(folder_path, loader_cls=TextLoader)
24
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=5000, chunk_overlap=250)
25
  documents = loader.load()
26
+ all_splits = text_splitter.split_documents(documents)
27
  print('Length of documents:', len(documents))
28
+ print("LEN of all_splits", len(all_splits))
29
+ return all_splits
30
 
31
  def load_faiss_index(self, faiss_index_path):
32
  cpu_index = faiss.read_index(faiss_index_path)
 
54
  )
55
  return generate_text
56
 
 
 
 
 
 
 
57
  def query_and_generate_response(self, query):
58
  query_embedding = self.embeddings.encode(query, convert_to_tensor=True).cpu().numpy()
59
  distances, indices = self.gpu_index.search(np.array([query_embedding]), k=5)
 
62
  for idx in indices[0]:
63
  content += "-" * 50 + "\n"
64
  content += self.all_splits[idx].page_content + "\n"
65
+ print("CHUNK",idx)
66
  print(self.all_splits[idx].page_content)
67
  print("############################")
68
  prompt=f"""
 
134
  font-weight: bold; /* Make text bold */
135
  }
136
  """
137
+ EXAMPLES = ["Can the VIP and CSI2 modules operate simultaneously? ",
138
  "I'm using Code Composer Studio 5.4.0.00091 and enabled FPv4SPD16 floating point support for CortexM4 in TDA2. However, after building the project, the .asm file shows --float_support=vfplib instead of FPv4SPD16. Why is this happening?",
139
  "Could you clarify the maximum number of cameras that can be connected simultaneously to the video input ports on the TDA2x SoC, considering it supports up to 10 multiplexed input ports and includes 3 dedicated video input modules?"]
140