Shreyas094 commited on
Commit
0d333d4
·
verified ·
1 Parent(s): f3c8ff5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -4
app.py CHANGED
@@ -66,9 +66,24 @@ def load_document(file: NamedTemporaryFile, parser: str = "llamaparse") -> List[
66
  else:
67
  raise ValueError("Invalid parser specified. Use 'pypdf' or 'llamaparse'.")
68
 
 
69
  def get_embeddings():
70
  return HuggingFaceEmbeddings(model_name="avsolatorio/GIST-Embedding-v0")
71
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
  # Add this at the beginning of your script, after imports
73
  DOCUMENTS_FILE = "uploaded_documents.json"
74
 
@@ -94,7 +109,7 @@ def update_vectors(files, parser):
94
  logging.warning("No files provided for update_vectors")
95
  return "Please upload at least one PDF file.", display_documents()
96
 
97
- embed = get_embeddings()
98
  total_chunks = 0
99
 
100
  all_data = []
@@ -148,7 +163,7 @@ def delete_documents(selected_docs):
148
  if not selected_docs:
149
  return "No documents selected for deletion.", display_documents()
150
 
151
- embed = get_embeddings()
152
  database = FAISS.load_local("faiss_database", embed, allow_dangerous_deserialization=True)
153
 
154
  deleted_docs = []
@@ -322,7 +337,7 @@ def respond(message, history, model, temperature, num_calls, use_web_search, sel
322
  # logging.info(f"Generated Response (first line): {first_line}")
323
  yield response
324
  else:
325
- embed = get_embeddings()
326
  if os.path.exists("faiss_database"):
327
  database = FAISS.load_local("faiss_database", embed, allow_dangerous_deserialization=True)
328
  retriever = database.as_retriever(search_kwargs={"k": 20})
@@ -453,7 +468,7 @@ After writing the document, please provide a list of sources used in your respon
453
  def get_response_from_pdf(query, model, selected_docs, num_calls=3, temperature=0.2):
454
  logging.info(f"Entering get_response_from_pdf with query: {query}, model: {model}, selected_docs: {selected_docs}")
455
 
456
- embed = get_embeddings()
457
  if os.path.exists("faiss_database"):
458
  logging.info("Loading FAISS database")
459
  database = FAISS.load_local("faiss_database", embed, allow_dangerous_deserialization=True)
 
66
  else:
67
  raise ValueError("Invalid parser specified. Use 'pypdf' or 'llamaparse'.")
68
 
69
+ # Function to get the embeddings model
70
  def get_embeddings():
71
  return HuggingFaceEmbeddings(model_name="avsolatorio/GIST-Embedding-v0")
72
 
73
+ # Function to encode text with specific financial focus
74
+ def get_embedding(text):
75
+ instruction = """
76
+ Encode this text with a focus on financial information.
77
+ Pay special attention to:
78
+ 1. Numerical data related to earnings, revenue, and other financial metrics
79
+ 2. Statements about company performance and future outlook
80
+ 3. Comparisons to previous periods or industry benchmarks
81
+ 4. Key financial terms and their context
82
+ Ignore general boilerplate text and focus on the substantive financial content.
83
+ """
84
+ embedding_model = get_embeddings()
85
+ return embedding_model.encode(instruction + "\n\n" + text)
86
+
87
  # Add this at the beginning of your script, after imports
88
  DOCUMENTS_FILE = "uploaded_documents.json"
89
 
 
109
  logging.warning("No files provided for update_vectors")
110
  return "Please upload at least one PDF file.", display_documents()
111
 
112
+ embed = get_embedding()
113
  total_chunks = 0
114
 
115
  all_data = []
 
163
  if not selected_docs:
164
  return "No documents selected for deletion.", display_documents()
165
 
166
+ embed = get_embedding()
167
  database = FAISS.load_local("faiss_database", embed, allow_dangerous_deserialization=True)
168
 
169
  deleted_docs = []
 
337
  # logging.info(f"Generated Response (first line): {first_line}")
338
  yield response
339
  else:
340
+ embed = get_embedding()
341
  if os.path.exists("faiss_database"):
342
  database = FAISS.load_local("faiss_database", embed, allow_dangerous_deserialization=True)
343
  retriever = database.as_retriever(search_kwargs={"k": 20})
 
468
  def get_response_from_pdf(query, model, selected_docs, num_calls=3, temperature=0.2):
469
  logging.info(f"Entering get_response_from_pdf with query: {query}, model: {model}, selected_docs: {selected_docs}")
470
 
471
+ embed = get_embedding()
472
  if os.path.exists("faiss_database"):
473
  logging.info("Loading FAISS database")
474
  database = FAISS.load_local("faiss_database", embed, allow_dangerous_deserialization=True)