Spaces:
Sleeping
Sleeping
arjunanand13
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -13,17 +13,20 @@ import gradio as gr
|
|
13 |
|
14 |
class DocumentRetrievalAndGeneration:
|
15 |
def __init__(self, embedding_model_name, lm_model_id, data_folder, faiss_index_path):
|
16 |
-
self.
|
17 |
self.embeddings = SentenceTransformer(embedding_model_name)
|
18 |
self.gpu_index = self.load_faiss_index(faiss_index_path)
|
19 |
self.llm = self.initialize_llm(lm_model_id)
|
20 |
-
self.all_splits = self.split_documents()
|
21 |
|
22 |
def load_documents(self, folder_path):
|
23 |
loader = DirectoryLoader(folder_path, loader_cls=TextLoader)
|
|
|
24 |
documents = loader.load()
|
|
|
25 |
print('Length of documents:', len(documents))
|
26 |
-
|
|
|
27 |
|
28 |
def load_faiss_index(self, faiss_index_path):
|
29 |
cpu_index = faiss.read_index(faiss_index_path)
|
@@ -51,12 +54,6 @@ class DocumentRetrievalAndGeneration:
|
|
51 |
)
|
52 |
return generate_text
|
53 |
|
54 |
-
def split_documents(self):
|
55 |
-
text_splitter = RecursiveCharacterTextSplitter(chunk_size=5000, chunk_overlap=250)
|
56 |
-
all_splits = text_splitter.split_documents(self.documents)
|
57 |
-
print("LEN of all_splits", len(all_splits))
|
58 |
-
return all_splits
|
59 |
-
|
60 |
def query_and_generate_response(self, query):
|
61 |
query_embedding = self.embeddings.encode(query, convert_to_tensor=True).cpu().numpy()
|
62 |
distances, indices = self.gpu_index.search(np.array([query_embedding]), k=5)
|
@@ -65,6 +62,7 @@ class DocumentRetrievalAndGeneration:
|
|
65 |
for idx in indices[0]:
|
66 |
content += "-" * 50 + "\n"
|
67 |
content += self.all_splits[idx].page_content + "\n"
|
|
|
68 |
print(self.all_splits[idx].page_content)
|
69 |
print("############################")
|
70 |
prompt=f"""
|
@@ -136,7 +134,7 @@ if __name__ == "__main__":
|
|
136 |
font-weight: bold; /* Make text bold */
|
137 |
}
|
138 |
"""
|
139 |
-
EXAMPLES = ["
|
140 |
"I'm using Code Composer Studio 5.4.0.00091 and enabled FPv4SPD16 floating point support for CortexM4 in TDA2. However, after building the project, the .asm file shows --float_support=vfplib instead of FPv4SPD16. Why is this happening?",
|
141 |
"Could you clarify the maximum number of cameras that can be connected simultaneously to the video input ports on the TDA2x SoC, considering it supports up to 10 multiplexed input ports and includes 3 dedicated video input modules?"]
|
142 |
|
|
|
13 |
|
14 |
class DocumentRetrievalAndGeneration:
|
15 |
def __init__(self, embedding_model_name, lm_model_id, data_folder, faiss_index_path):
|
16 |
+
self.all_splits = self.load_documents(data_folder)
|
17 |
self.embeddings = SentenceTransformer(embedding_model_name)
|
18 |
self.gpu_index = self.load_faiss_index(faiss_index_path)
|
19 |
self.llm = self.initialize_llm(lm_model_id)
|
20 |
+
# self.all_splits = self.split_documents()
|
21 |
|
22 |
def load_documents(self, folder_path):
|
23 |
loader = DirectoryLoader(folder_path, loader_cls=TextLoader)
|
24 |
+
text_splitter = RecursiveCharacterTextSplitter(chunk_size=5000, chunk_overlap=250)
|
25 |
documents = loader.load()
|
26 |
+
all_splits = text_splitter.split_documents(documents)
|
27 |
print('Length of documents:', len(documents))
|
28 |
+
print("LEN of all_splits", len(all_splits))
|
29 |
+
return all_splits
|
30 |
|
31 |
def load_faiss_index(self, faiss_index_path):
|
32 |
cpu_index = faiss.read_index(faiss_index_path)
|
|
|
54 |
)
|
55 |
return generate_text
|
56 |
|
|
|
|
|
|
|
|
|
|
|
|
|
57 |
def query_and_generate_response(self, query):
|
58 |
query_embedding = self.embeddings.encode(query, convert_to_tensor=True).cpu().numpy()
|
59 |
distances, indices = self.gpu_index.search(np.array([query_embedding]), k=5)
|
|
|
62 |
for idx in indices[0]:
|
63 |
content += "-" * 50 + "\n"
|
64 |
content += self.all_splits[idx].page_content + "\n"
|
65 |
+
print("CHUNK",idx)
|
66 |
print(self.all_splits[idx].page_content)
|
67 |
print("############################")
|
68 |
prompt=f"""
|
|
|
134 |
font-weight: bold; /* Make text bold */
|
135 |
}
|
136 |
"""
|
137 |
+
EXAMPLES = ["Can the VIP and CSI2 modules operate simultaneously? ",
|
138 |
"I'm using Code Composer Studio 5.4.0.00091 and enabled FPv4SPD16 floating point support for CortexM4 in TDA2. However, after building the project, the .asm file shows --float_support=vfplib instead of FPv4SPD16. Why is this happening?",
|
139 |
"Could you clarify the maximum number of cameras that can be connected simultaneously to the video input ports on the TDA2x SoC, considering it supports up to 10 multiplexed input ports and includes 3 dedicated video input modules?"]
|
140 |
|