sudip1987 commited on
Commit
cbab016
·
verified ·
1 Parent(s): 46d0a50

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -3
app.py CHANGED
@@ -40,6 +40,15 @@ def get_text_splitter(strategy: str, chunk_size: int = 1024, chunk_overlap: int
40
  }
41
  return splitters.get(strategy)
42
 
 
 
 
 
 
 
 
 
 
43
  def load_doc(list_file_path: List[str], splitting_strategy: str, chunk_size: str):
44
  chunk_size_value = CHUNK_SIZES[chunk_size][splitting_strategy]
45
  loaders = [PyPDFLoader(x) for x in list_file_path]
@@ -59,12 +68,13 @@ def create_db(splits, db_choice: str = "faiss"):
59
  "qdrant": lambda: Qdrant.from_documents(
60
  splits,
61
  embeddings,
62
- location=":memory:",
63
  collection_name="pdf_docs"
64
  )
65
  }
66
  return db_creators[db_choice]()
67
 
 
68
  def initialize_database(list_file_obj, splitting_strategy, chunk_size, db_choice, progress=gr.Progress()):
69
  """Initialize vector database with error handling"""
70
  try:
@@ -77,7 +87,7 @@ def initialize_database(list_file_obj, splitting_strategy, chunk_size, db_choice
77
 
78
  doc_splits = load_doc(list_file_path, splitting_strategy, chunk_size)
79
  if not doc_splits:
80
- return None, "No content extracted from documents."
81
 
82
  vector_db = create_db(doc_splits, db_choice)
83
  return vector_db, f"Database created successfully using {splitting_strategy} splitting and {db_choice} vector database!"
@@ -100,7 +110,7 @@ def initialize_llmchain(llm_choice, temperature, max_tokens, top_k, vector_db, p
100
  max_new_tokens=max_tokens,
101
  top_k=top_k
102
  )
103
-
104
  memory = ConversationBufferMemory(
105
  memory_key="chat_history",
106
  output_key='answer',
 
40
  }
41
  return splitters.get(strategy)
42
 
43
+ # def get_text_splitter(strategy, chunk_size=1024, chunk_overlap=64):
44
+ # if strategy == "recursive":
45
+ # return RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
46
+ # elif strategy == "fixed":
47
+ # return CharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
48
+ # elif strategy == "token":
49
+ # return TokenTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
50
+ # return None
51
+
52
  def load_doc(list_file_path: List[str], splitting_strategy: str, chunk_size: str):
53
  chunk_size_value = CHUNK_SIZES[chunk_size][splitting_strategy]
54
  loaders = [PyPDFLoader(x) for x in list_file_path]
 
68
  "qdrant": lambda: Qdrant.from_documents(
69
  splits,
70
  embeddings,
71
+ location=":memory:", # In memory database for qdrant
72
  collection_name="pdf_docs"
73
  )
74
  }
75
  return db_creators[db_choice]()
76
 
77
+ # Initialize Vector DB
78
  def initialize_database(list_file_obj, splitting_strategy, chunk_size, db_choice, progress=gr.Progress()):
79
  """Initialize vector database with error handling"""
80
  try:
 
87
 
88
  doc_splits = load_doc(list_file_path, splitting_strategy, chunk_size)
89
  if not doc_splits:
90
+ return None, "No content extracted from documents."
91
 
92
  vector_db = create_db(doc_splits, db_choice)
93
  return vector_db, f"Database created successfully using {splitting_strategy} splitting and {db_choice} vector database!"
 
110
  max_new_tokens=max_tokens,
111
  top_k=top_k
112
  )
113
+ # Temporary memory
114
  memory = ConversationBufferMemory(
115
  memory_key="chat_history",
116
  output_key='answer',