andreasmartin commited on
Commit
78aafcc
·
1 Parent(s): 67bfb80

deepnote update

Browse files
Files changed (3) hide show
  1. app.py +1 -0
  2. faq.py +17 -6
  3. util.py +2 -2
app.py CHANGED
@@ -38,6 +38,7 @@ async def delete_vectordb_api():
38
 
39
 
40
  def ask(sheet_url: str, page_content_column: str, k: int, question: str):
 
41
  vectordb = faq.load_vectordb(sheet_url, page_content_column)
42
  result = faq.similarity_search(vectordb, question, k=k)
43
  return result
 
38
 
39
 
40
  def ask(sheet_url: str, page_content_column: str, k: int, question: str):
41
+ util.SPLIT_PAGE_BREAKS = False
42
  vectordb = faq.load_vectordb(sheet_url, page_content_column)
43
  result = faq.similarity_search(vectordb, question, k=k)
44
  return result
faq.py CHANGED
@@ -14,7 +14,8 @@ from enum import Enum
14
  EMBEDDING_MODEL_FOLDER = ".embedding-model"
15
  VECTORDB_FOLDER = ".vectordb"
16
  EMBEDDING_MODEL = "sentence-transformers/all-mpnet-base-v2"
17
- VECTORDB_TYPE = Enum("VECTORDB_TYPE", ["AwaDB", "Chroma"])
 
18
 
19
 
20
  def create_documents(df: pd.DataFrame, page_content_column: str) -> pd.DataFrame:
@@ -31,13 +32,18 @@ def define_embedding_function(model_name: str) -> HuggingFaceEmbeddings:
31
 
32
 
33
  def get_vectordb(
34
- faq_id: str, embedding_function: Embeddings, documents: List[Document] = None, vectordb_type: str = VECTORDB_TYPE.AwaDB
 
 
 
35
  ) -> VectorStore:
36
  vectordb = None
37
 
38
- if vectordb_type is VECTORDB_TYPE.AwaDB:
39
  if documents is None:
40
- vectordb = AwaDB(embedding=embedding_function, log_and_data_dir=VECTORDB_FOLDER)
 
 
41
  if not vectordb.load_local(table_name=faq_id):
42
  raise Exception("faq_id may not exists")
43
  else:
@@ -47,9 +53,13 @@ def get_vectordb(
47
  table_name=faq_id,
48
  log_and_data_dir=VECTORDB_FOLDER,
49
  )
50
- if vectordb_type is VECTORDB_TYPE.Chroma:
51
  if documents is None:
52
- vectordb = Chroma(collection_name=faq_id, embedding_function=embedding_function, persist_directory=VECTORDB_FOLDER)
 
 
 
 
53
  if not vectordb.get()["ids"]:
54
  raise Exception("faq_id may not exists")
55
  else:
@@ -79,6 +89,7 @@ def load_vectordb_id(
79
  try:
80
  vectordb = get_vectordb(faq_id=faq_id, embedding_function=embedding_function)
81
  except Exception as e:
 
82
  vectordb = create_vectordb_id(faq_id, page_content_column, embedding_function)
83
 
84
  return vectordb
 
14
  EMBEDDING_MODEL_FOLDER = ".embedding-model"
15
  VECTORDB_FOLDER = ".vectordb"
16
  EMBEDDING_MODEL = "sentence-transformers/all-mpnet-base-v2"
17
+ VECTORDB_TYPES = Enum("VECTORDB_TYPES", ["AwaDB", "Chroma"])
18
+ VECTORDB_TYPE = VECTORDB_TYPES.AwaDB
19
 
20
 
21
  def create_documents(df: pd.DataFrame, page_content_column: str) -> pd.DataFrame:
 
32
 
33
 
34
  def get_vectordb(
35
+ faq_id: str,
36
+ embedding_function: Embeddings,
37
+ documents: List[Document] = None,
38
+ vectordb_type: str = VECTORDB_TYPE,
39
  ) -> VectorStore:
40
  vectordb = None
41
 
42
+ if vectordb_type is VECTORDB_TYPES.AwaDB:
43
  if documents is None:
44
+ vectordb = AwaDB(
45
+ embedding=embedding_function, log_and_data_dir=VECTORDB_FOLDER
46
+ )
47
  if not vectordb.load_local(table_name=faq_id):
48
  raise Exception("faq_id may not exists")
49
  else:
 
53
  table_name=faq_id,
54
  log_and_data_dir=VECTORDB_FOLDER,
55
  )
56
+ if vectordb_type is VECTORDB_TYPES.Chroma:
57
  if documents is None:
58
+ vectordb = Chroma(
59
+ collection_name=faq_id,
60
+ embedding_function=embedding_function,
61
+ persist_directory=VECTORDB_FOLDER,
62
+ )
63
  if not vectordb.get()["ids"]:
64
  raise Exception("faq_id may not exists")
65
  else:
 
89
  try:
90
  vectordb = get_vectordb(faq_id=faq_id, embedding_function=embedding_function)
91
  except Exception as e:
92
+ print(e)
93
  vectordb = create_vectordb_id(faq_id, page_content_column, embedding_function)
94
 
95
  return vectordb
util.py CHANGED
@@ -68,6 +68,6 @@ def remove_duplicates_by_column(df, column):
68
 
69
 
70
  def dataframe_to_dict(df):
71
- df_records = df.to_dict(orient='records')
72
 
73
- return df_records
 
68
 
69
 
70
  def dataframe_to_dict(df):
71
+ df_records = df.to_dict(orient="records")
72
 
73
+ return df_records