Deepakraj2006 commited on
Commit
0ccdb83
Β·
verified Β·
1 Parent(s): a800118

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -20
app.py CHANGED
@@ -41,33 +41,53 @@ def init_llm():
41
  )
42
 
43
 
 
 
44
  def process_document(file):
45
- """Process uploaded PDF and create a retriever"""
46
  global conversation_retrieval_chain
47
 
48
  if not llm_pipeline or not embeddings:
49
  init_llm()
50
 
51
- # Load PDF and split text
52
- loader = PyPDFLoader(file.name)
53
- documents = loader.load()
54
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=64)
55
- texts = text_splitter.split_documents(documents)
56
-
57
- # Load or create ChromaDB
58
- if os.path.exists(persist_directory):
59
- db = Chroma(persist_directory=persist_directory, embedding_function=embeddings)
60
- else:
61
- db = Chroma.from_documents(texts, embedding=embeddings, persist_directory=persist_directory)
62
-
63
- retriever = db.as_retriever(search_type="similarity", search_kwargs={'k': 6})
64
-
65
- # Initialize ConversationalRetrievalChain
66
- conversation_retrieval_chain = ConversationalRetrievalChain.from_llm(
67
- llm=llm_pipeline, retriever=retriever
68
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
 
70
- return "πŸ“„ PDF uploaded and processed successfully! You can now ask questions."
71
 
72
 
73
  def process_prompt(prompt, chat_history_display):
 
41
  )
42
 
43
 
44
+ import time
45
+
46
  def process_document(file):
 
47
  global conversation_retrieval_chain
48
 
49
  if not llm_pipeline or not embeddings:
50
  init_llm()
51
 
52
+ start_time = time.time()
53
+ print(f"πŸ“‚ Uploading PDF: {file.name}")
54
+
55
+ try:
56
+ # βœ… Ensure file is saved correctly
57
+ file_path = os.path.join("/tmp/uploads", file.name)
58
+ with open(file_path, "wb") as f:
59
+ f.write(file.read())
60
+ print(f"βœ… PDF saved at {file_path} in {time.time() - start_time:.2f}s")
61
+
62
+ # βœ… Load PDF
63
+ start_time = time.time()
64
+ loader = PyPDFLoader(file_path)
65
+ documents = loader.load()
66
+ print(f"βœ… PDF loaded in {time.time() - start_time:.2f}s")
67
+
68
+ # βœ… Split text
69
+ start_time = time.time()
70
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=50)
71
+ texts = text_splitter.split_documents(documents)
72
+ print(f"βœ… Text split in {time.time() - start_time:.2f}s")
73
+
74
+ # βœ… Create ChromaDB
75
+ start_time = time.time()
76
+ db = Chroma.from_documents(texts, embedding=embeddings, persist_directory="/tmp/chroma_db")
77
+ print(f"βœ… ChromaDB created in {time.time() - start_time:.2f}s")
78
+
79
+ # βœ… Create retrieval chain
80
+ conversation_retrieval_chain = ConversationalRetrievalChain.from_llm(
81
+ llm=llm_pipeline, retriever=db.as_retriever()
82
+ )
83
+ print("βœ… Document processing complete!")
84
+
85
+ return "πŸ“„ PDF uploaded and processed successfully! You can now ask questions."
86
+
87
+ except Exception as e:
88
+ print(f"❌ Error processing PDF: {str(e)}")
89
+ return f"Error: {str(e)}"
90
 
 
91
 
92
 
93
  def process_prompt(prompt, chat_history_display):