aakash0017 commited on
Commit
0adc74f
·
1 Parent(s): 9e27789

added rag documents

Browse files
Files changed (1) hide show
  1. conversation.py +5 -2
conversation.py CHANGED
@@ -135,6 +135,7 @@ import gradio as gr
135
  import time
136
  from db_func import insert_one
137
  from langchain.agents import AgentExecutor
 
138
 
139
 
140
  def get_bert_embeddings(sentence):
@@ -213,9 +214,11 @@ def run(input_):
213
  for i in range(len(sources)):
214
  temp = sources[i].replace('.pdf', '').replace('.txt', '').replace("AAO", "").replace("2022-2023", "").replace("data/book", "").replace("text", "").replace(" ", " ")
215
  source_text += f"{i+1}. {temp}\n"
216
- doc_text += f"{i+1}. {docs[i]}\n"
 
217
 
218
- output_text = f"{output_text} \n\nSources: \n{source_text}"
 
219
  # output_text = f"{output_text}"
220
 
221
  doc_to_insert = {
 
135
  import time
136
  from db_func import insert_one
137
  from langchain.agents import AgentExecutor
138
+ import re
139
 
140
 
141
  def get_bert_embeddings(sentence):
 
214
  for i in range(len(sources)):
215
  temp = sources[i].replace('.pdf', '').replace('.txt', '').replace("AAO", "").replace("2022-2023", "").replace("data/book", "").replace("text", "").replace(" ", " ")
216
  source_text += f"{i+1}. {temp}\n"
217
+ cleaned_text = re.sub(r'[^a-zA-Z0-9\s]', '', docs[i].strip().lower())
218
+ doc_text += f"{i+1}. {cleaned_text}\n"
219
 
220
+ # output_text = f"{output_text} \n\nSources: \n{source_text}"
221
+ output_text = f"{output_text} \n\nSources: \n{source_text}\n\nDocuments: \n{doc_text}"
222
  # output_text = f"{output_text}"
223
 
224
  doc_to_insert = {